3d interp

多线程约束越大执行时间
easy
2023-07-25 10:09:50 +08:00 · 2023-07-22 03:05:04 +08:00 · 2023-07-22 02:25:30 +08:00 · 2023-07-22 01:48:24 +08:00 · 2023-07-21 22:49:07 +08:00 · 2023-07-21 17:02:29 +08:00
15 changed files with 1036 additions and 588 deletions
--- a/.gitignore
+++ b/.gitignore
@ -151,3 +151,4 @@ multi-language
 request_llm/moss
 media
 flagged
+objdump.tmp
--- a/config.py
+++ b/config.py
@ -136,4 +136,8 @@ ALIYUN_APPKEY=""   # 例如 RoPlZrM88DnAFkZK


 # Claude API KEY
-ANTHROPIC_API_KEY = ""
+ANTHROPIC_API_KEY = ""
+
+
+# 自定义API KEY格式
+CUSTOM_API_KEY_PATTERN = ""
--- a/crazy_functional.py
+++ b/crazy_functional.py
@ -432,18 +432,18 @@ def get_crazy_functions():
    except:
        print('Load function plugin failed')
        
-    # try:
-    #     from crazy_functions.虚空终端 import 终端
-    #     function_plugins.update({
-    #         "超级终端": {
-    #             "Color": "stop",
-    #             "AsButton": False,
-    #             # "AdvancedArgs": True,
-    #             # "ArgsReminder": "",
-    #             "Function": HotReload(终端)
-    #         }
-    #     })
-    # except:
-    #     print('Load function plugin failed')
+    try:
+        from crazy_functions.虚空终端CodeInterpreter import 虚空终端CodeInterpreter
+        function_plugins.update({
+            "虚空终端CodeInterpreter": {
+                "Color": "stop",
+                "AsButton": True,
+                # "AdvancedArgs": True,
+                # "ArgsReminder": "",
+                "Function": HotReload(虚空终端CodeInterpreter)
+            }
+        })
+    except:
+        print('Load function plugin failed')

    return function_plugins
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@ -157,7 +157,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
    try:
        import glob, os, time, subprocess
        subprocess.Popen(['pdflatex', '-version'])
-        from .latex_utils import Latex精细分解与转化, 编译Latex
+        from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
    except Exception as e:
        chatbot.append([ f"解析项目: {txt}",
            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
@ -234,7 +234,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
    try:
        import glob, os, time, subprocess
        subprocess.Popen(['pdflatex', '-version'])
-        from .latex_utils import Latex精细分解与转化, 编译Latex
+        from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
    except Exception as e:
        chatbot.append([ f"解析项目: {txt}",
            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@ -195,9 +195,12 @@ def test_Latex():
    # txt = r"https://arxiv.org/abs/2303.08774"
    # txt = r"https://arxiv.org/abs/2303.12712"
    # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
-    txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误！
-    
-
+    # txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误！
+    # txt = "https://arxiv.org/abs/2205.14135"
+    # txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
+    # txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
+    txt = r"2210.03629"
+    txt = r"2307.04964"
    for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
        cli_printer.print(cb)   #  print(cb)

@ -225,6 +228,22 @@ def test_chatglm_finetune():
    for cookies, cb, hist, msg in (启动微调)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
        cli_printer.print(cb)

+def test_虚空终端CodeInterpreter():
+    from crazy_functions.虚空终端CodeInterpreter import 虚空终端CodeInterpreter
+    txt = 'Convert this dataset to excel.'
+    plugin_kwargs = {"recently_uploaded_files":"build/assets/iris.csv"}
+
+    for cookies, cb, hist, msg in (虚空终端CodeInterpreter)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)
+
+
+def test_解析项目源代码炫酷版():
+    from crazy_functions.解析项目源代码炫酷版 import 解析一个Python项目炫酷版
+    txt = './'
+
+    for cookies, cb, hist, msg in (解析一个Python项目炫酷版)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)
+

 if __name__ == "__main__":
    # test_解析一个Python项目()
@ -241,6 +260,8 @@ if __name__ == "__main__":
    # test_Langchain知识库()
    # test_Langchain知识库读取()
    # test_Latex()
-    test_chatglm_finetune()
+    # test_chatglm_finetune()
+    # test_虚空终端CodeInterpreter()
+    test_解析项目源代码炫酷版()
    input("程序完成，回车退出。")
    print("退出。")
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@ -141,7 +141,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        chatbot, history_array, sys_prompt_array, 
        refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
        handle_token_exceed=True, show_user_at_complete=False,
-        retry_times_at_unknown_error=2,
+        retry_times_at_unknown_error=2, callback_fn=None
        ):
    """
    Request GPT model using multiple threads with UI and high efficiency
@ -166,6 +166,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        handle_token_exceed：是否自动处理token溢出的情况，如果选择自动处理，则会在溢出时暴力截断，默认开启
        show_user_at_complete (bool, optional): (在结束时，把完整输入-输出结果显示在聊天框)
        retry_times_at_unknown_error：子任务失败时的重试次数
+        callback_fn: 当信息更新时，在主进程调用的回调函数

    输出 Returns:
        list: List of GPT model responses （每个子任务的输出汇总，如果某个子任务出错，response中会携带traceback报错信息，方便调试和定位问题。）
@ -283,6 +284,9 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        # 在前端打印些好玩的东西
        chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始，完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
        yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+        # 回调函数
+        if callback_fn is not None: callback_fn([mutable[thread_index][0] for thread_index in range(len(futures))])
+        # 结束了吗？
        if all(worker_done):
            executor.shutdown()
            break
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@ -1,320 +1,16 @@
 from toolbox import update_ui, update_ui_lastest_msg    # 刷新Gradio前端界面
 from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
+from .latex_toolbox import PRESERVE, TRANSFORM
+from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
+from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
+from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
+
 import os, shutil
 import re
 import numpy as np
+
 pj = os.path.join

-"""
-========================================================================
-Part One
-Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
-========================================================================
-"""
-PRESERVE = 0
-TRANSFORM = 1
-
-def set_forbidden_text(text, mask, pattern, flags=0):
-    """
-    Add a preserve text area in this paper
-    e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
-    you can mask out (mask = PRESERVE so that text become untouchable for GPT) 
-    everything between "\begin{equation}" and "\end{equation}"
-    """
-    if isinstance(pattern, list): pattern = '|'.join(pattern)
-    pattern_compile = re.compile(pattern, flags)
-    for res in pattern_compile.finditer(text):
-        mask[res.span()[0]:res.span()[1]] = PRESERVE
-    return text, mask
-
-def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
-    """
-    Move area out of preserve area (make text editable for GPT)
-    count the number of the braces so as to catch compelete text area. 
-    e.g.
-    \begin{abstract} blablablablablabla. \end{abstract} 
-    """
-    if isinstance(pattern, list): pattern = '|'.join(pattern)
-    pattern_compile = re.compile(pattern, flags)
-    for res in pattern_compile.finditer(text):
-        if not forbid_wrapper:
-            mask[res.span()[0]:res.span()[1]] = TRANSFORM
-        else:
-            mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE   # '\\begin{abstract}'
-            mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM   # abstract
-            mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE   # abstract
-    return text, mask
-
-def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
-    """
-    Add a preserve text area in this paper (text become untouchable for GPT).
-    count the number of the braces so as to catch compelete text area. 
-    e.g.
-    \caption{blablablablabla\texbf{blablabla}blablabla.} 
-    """
-    pattern_compile = re.compile(pattern, flags)
-    for res in pattern_compile.finditer(text):
-        brace_level = -1
-        p = begin = end = res.regs[0][0]
-        for _ in range(1024*16):
-            if text[p] == '}' and brace_level == 0: break
-            elif text[p] == '}':  brace_level -= 1
-            elif text[p] == '{':  brace_level += 1
-            p += 1
-        end = p+1
-        mask[begin:end] = PRESERVE
-    return text, mask
-
-def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
-    """
-    Move area out of preserve area (make text editable for GPT)
-    count the number of the braces so as to catch compelete text area. 
-    e.g.
-    \caption{blablablablabla\texbf{blablabla}blablabla.} 
-    """
-    pattern_compile = re.compile(pattern, flags)
-    for res in pattern_compile.finditer(text):
-        brace_level = 0
-        p = begin = end = res.regs[1][0]
-        for _ in range(1024*16):
-            if text[p] == '}' and brace_level == 0: break
-            elif text[p] == '}':  brace_level -= 1
-            elif text[p] == '{':  brace_level += 1
-            p += 1
-        end = p
-        mask[begin:end] = TRANSFORM
-        if forbid_wrapper:
-            mask[res.regs[0][0]:begin] = PRESERVE
-            mask[end:res.regs[0][1]] = PRESERVE
-    return text, mask
-
-def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
-    """
-    Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
-    Add it to preserve area
-    """
-    pattern_compile = re.compile(pattern, flags)
-    def search_with_line_limit(text, mask):
-        for res in pattern_compile.finditer(text):
-            cmd = res.group(1)  # begin{what}
-            this = res.group(2) # content between begin and end
-            this_mask = mask[res.regs[2][0]:res.regs[2][1]]
-            white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 
-                          'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
-            if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
-                this, this_mask = search_with_line_limit(this, this_mask)
-                mask[res.regs[2][0]:res.regs[2][1]] = this_mask
-            else:
-                mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
-        return text, mask
-    return search_with_line_limit(text, mask) 
-
-class LinkedListNode():
-    """
-    Linked List Node
-    """
-    def __init__(self, string, preserve=True) -> None:
-        self.string = string
-        self.preserve = preserve
-        self.next = None
-        # self.begin_line = 0
-        # self.begin_char = 0
-
-def convert_to_linklist(text, mask):
-    root = LinkedListNode("", preserve=True)
-    current_node = root
-    for c, m, i in zip(text, mask, range(len(text))):
-        if (m==PRESERVE and current_node.preserve) \
-            or (m==TRANSFORM and not current_node.preserve):
-            # add
-            current_node.string += c
-        else:
-            current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
-            current_node = current_node.next
-    return root
-"""
-========================================================================
-Latex Merge File
-========================================================================
-"""
-
-def 寻找Latex主文件(file_manifest, mode):
-    """
-    在多Tex文档中，寻找主文件，必须包含documentclass，返回找到的第一个。
-    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
-    """
-    canidates = []
-    for texf in file_manifest:
-        if os.path.basename(texf).startswith('merge'):
-            continue
-        with open(texf, 'r', encoding='utf8', errors='ignore') as f:
-            file_content = f.read()
-        if r'\documentclass' in file_content:
-            canidates.append(texf)
-        else:
-            continue
-
-    if len(canidates) == 0:
-        raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
-    elif len(canidates) == 1:
-        return canidates[0]
-    else: # if len(canidates) >= 2 通过一些Latex模板中常见（但通常不会出现在正文）的单词，对不同latex源文件扣分，取评分最高者返回
-        canidates_score = []
-        # 给出一些判定模板文档的词作为扣分项
-        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
-        expected_words = ['\input', '\ref', '\cite']
-        for texf in canidates:
-            canidates_score.append(0)
-            with open(texf, 'r', encoding='utf8', errors='ignore') as f:
-                file_content = f.read()
-            for uw in unexpected_words:
-                if uw in file_content:
-                    canidates_score[-1] -= 1
-            for uw in expected_words:
-                if uw in file_content:
-                    canidates_score[-1] += 1
-        select = np.argmax(canidates_score) # 取评分最高者返回
-        return canidates[select]
-    
-def rm_comments(main_file):
-    new_file_remove_comment_lines = []
-    for l in main_file.splitlines():
-        # 删除整行的空注释
-        if l.lstrip().startswith("%"):
-            pass
-        else:
-            new_file_remove_comment_lines.append(l)
-    main_file = '\n'.join(new_file_remove_comment_lines)
-    # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file)  # 将 \include 命令转换为 \input 命令
-    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
-    return main_file
-
-def find_tex_file_ignore_case(fp):
-    dir_name = os.path.dirname(fp)
-    base_name = os.path.basename(fp)
-    if not base_name.endswith('.tex'): base_name+='.tex'
-    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
-    # go case in-sensitive
-    import glob
-    for f in glob.glob(dir_name+'/*.tex'):
-        base_name_s = os.path.basename(fp)
-        if base_name_s.lower() == base_name.lower(): return f
-    return None
-
-def merge_tex_files_(project_foler, main_file, mode):
-    """
-    Merge Tex project recrusively
-    """
-    main_file = rm_comments(main_file)
-    for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
-        f = s.group(1)
-        fp = os.path.join(project_foler, f)
-        fp = find_tex_file_ignore_case(fp)
-        if fp:
-            with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
-        else:
-            raise RuntimeError(f'找不到{fp}，Tex源文件缺失！')
-        c = merge_tex_files_(project_foler, c, mode)
-        main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
-    return main_file
-
-def merge_tex_files(project_foler, main_file, mode):
-    """
-    Merge Tex project recrusively
-    P.S. 顺便把CTEX塞进去以支持中文
-    P.S. 顺便把Latex的注释去除
-    """
-    main_file = merge_tex_files_(project_foler, main_file, mode)
-    main_file = rm_comments(main_file)
-
-    if mode == 'translate_zh':
-        # find paper documentclass
-        pattern = re.compile(r'\\documentclass.*\n')
-        match = pattern.search(main_file)
-        assert match is not None, "Cannot find documentclass statement!"
-        position = match.end()
-        add_ctex = '\\usepackage{ctex}\n'
-        add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
-        main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
-        # fontset=windows
-        import platform
-        main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
-        main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
-        # find paper abstract
-        pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
-        pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
-        match_opt1 = pattern_opt1.search(main_file)
-        match_opt2 = pattern_opt2.search(main_file)
-        assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
-    return main_file
-
-
-
-"""
-========================================================================
-Post process
-========================================================================
-"""
-def mod_inbraket(match):
-    """
-    为啥chatgpt会把cite里面的逗号换成中文逗号呀 
-    """
-    # get the matched string
-    cmd = match.group(1)
-    str_to_modify = match.group(2)
-    # modify the matched string
-    str_to_modify = str_to_modify.replace('：', ':')    # 前面是中文冒号，后面是英文冒号
-    str_to_modify = str_to_modify.replace('，', ',')    # 前面是中文逗号，后面是英文逗号
-    # str_to_modify = 'BOOM'
-    return "\\" + cmd + "{" + str_to_modify + "}"
-
-def fix_content(final_tex, node_string):
-    """
-    Fix common GPT errors to increase success rate
-    """
-    final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
-    final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
-    final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
-    final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
-
-    if "Traceback" in final_tex and "[Local Message]" in final_tex:
-        final_tex = node_string # 出问题了，还原原文
-    if node_string.count('\\begin') != final_tex.count('\\begin'):
-        final_tex = node_string # 出问题了，还原原文
-    if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
-        # walk and replace any _ without \
-        final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
-
-    def compute_brace_level(string):
-        # this function count the number of { and }
-        brace_level = 0
-        for c in string:
-            if c == "{": brace_level += 1
-            elif c == "}": brace_level -= 1
-        return brace_level
-    def join_most(tex_t, tex_o):
-        # this function join translated string and original string when something goes wrong
-        p_t = 0
-        p_o = 0
-        def find_next(string, chars, begin):
-            p = begin
-            while p < len(string):
-                if string[p] in chars: return p, string[p]
-                p += 1
-            return None, None
-        while True:
-            res1, char = find_next(tex_o, ['{','}'], p_o)
-            if res1 is None: break
-            res2, char = find_next(tex_t, [char], p_t)
-            if res2 is None: break
-            p_o = res1 + 1
-            p_t = res2 + 1
-        return tex_t[:p_t] + tex_o[p_o:]
-
-    if compute_brace_level(final_tex) != compute_brace_level(node_string):
-        # 出问题了，还原部分原文，保证括号正确
-        final_tex = join_most(final_tex, node_string)
-    return final_tex

 def split_subprocess(txt, project_folder, return_dict, opts):
    """
@ -356,77 +52,9 @@ def split_subprocess(txt, project_folder, return_dict, opts):
    text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
    root = convert_to_linklist(text, mask)

-    # 修复括号
-    node = root
-    while True:
-        string = node.string
-        if node.preserve: 
-            node = node.next
-            if node is None: break
-            continue
-        def break_check(string):
-            str_stack = [""] # (lv, index)
-            for i, c in enumerate(string):
-                if c == '{':
-                    str_stack.append('{')
-                elif c == '}':
-                    if len(str_stack) == 1:
-                        print('stack fix')
-                        return i
-                    str_stack.pop(-1)
-                else:
-                    str_stack[-1] += c
-            return -1
-        bp = break_check(string)
+    # 最后一步处理，增强稳健性
+    root = post_process(root)

-        if bp == -1:
-            pass
-        elif bp == 0:
-            node.string = string[:1]
-            q = LinkedListNode(string[1:], False)
-            q.next = node.next
-            node.next = q
-        else:
-            node.string = string[:bp]
-            q = LinkedListNode(string[bp:], False)
-            q.next = node.next
-            node.next = q
-
-        node = node.next
-        if node is None: break
-
-    # 屏蔽空行和太短的句子
-    node = root
-    while True:
-        if len(node.string.strip('\n').strip(''))==0: node.preserve = True
-        if len(node.string.strip('\n').strip(''))<42: node.preserve = True
-        node = node.next
-        if node is None: break
-    node = root
-    while True:
-        if node.next and node.preserve and node.next.preserve:
-            node.string += node.next.string
-            node.next = node.next.next
-        node = node.next
-        if node is None: break
-
-    # 将前后断行符脱离
-    node = root
-    prev_node = None
-    while True:
-        if not node.preserve:
-            lstriped_ = node.string.lstrip().lstrip('\n')
-            if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
-                prev_node.string += node.string[:-len(lstriped_)]
-                node.string = lstriped_
-            rstriped_ = node.string.rstrip().rstrip('\n')
-            if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
-                node.next.string = node.string[len(rstriped_):] + node.next.string
-                node.string = rstriped_
-        # =====
-        prev_node = node
-        node = node.next
-        if node is None: break
    # 输出html调试文件，用红色标注处保留区（PRESERVE），用黑色标注转换区（TRANSFORM）
    with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
        segment_parts_for_gpt = []
@ -437,7 +65,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
            show_html = node.string.replace('\n','<br/>')
            if not node.preserve:
                segment_parts_for_gpt.append(node.string)
-                f.write(f'<p style="color:black;">#{show_html}#</p>')
+                f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
            else:
                f.write(f'<p style="color:red;">{show_html}</p>')
            node = node.next
@ -448,8 +76,6 @@ def split_subprocess(txt, project_folder, return_dict, opts):
    return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
    return return_dict

-
-
 class LatexPaperSplit():
    """
    break down latex file to a linked list,
@ -464,18 +90,32 @@ class LatexPaperSplit():
        # 请您不要删除或修改这行警告，除非您是论文的原作者（如果您是论文原作者，欢迎加REAME中的QQ联系开发者）
        self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响，禁止移除或修改此警告。}}\\\\" 

-    def merge_result(self, arr, mode, msg):
+
+    def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
        """
        Merge the result after the GPT process completed
        """
        result_string = ""
-        p = 0
+        node_cnt = 0
+        line_cnt = 0
+        
        for node in self.nodes:
            if node.preserve:
+                line_cnt += node.string.count('\n')
                result_string += node.string
            else:
-                result_string += fix_content(arr[p], node.string)
-                p += 1
+                translated_txt = fix_content(arr[node_cnt], node.string)
+                begin_line = line_cnt
+                end_line = line_cnt + translated_txt.count('\n')
+
+                # reverse translation if any error
+                if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
+                    translated_txt = node.string
+
+                result_string += translated_txt
+                node_cnt += 1
+                line_cnt += translated_txt.count('\n')
+
        if mode == 'translate_zh':
            pattern = re.compile(r'\\begin\{abstract\}.*\n')
            match = pattern.search(result_string)
@ -490,6 +130,7 @@ class LatexPaperSplit():
            result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
        return result_string

+
    def split(self, txt, project_folder, opts): 
        """
        break down latex file to a linked list,
@ -511,7 +152,6 @@ class LatexPaperSplit():
        return self.sp


-
 class LatexPaperFileGroup():
    """
    use tokenizer to break down text according to max_token_limit
@ -539,7 +179,7 @@ class LatexPaperFileGroup():
                self.sp_file_index.append(index)
                self.sp_file_tag.append(self.file_paths[index])
            else:
-                from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
+                from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
                segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
                for j, segment in enumerate(segments):
                    self.sp_file_contents.append(segment)
@ -560,41 +200,14 @@ class LatexPaperFileGroup():
                f.write(res)
        return manifest

-def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
-
-    # write html
-    try:
-        import shutil
-        from .crazy_utils import construct_html
-        from toolbox import gen_time_str
-        ch = construct_html() 
-        orig = ""
-        trans = ""
-        final = []
-        for c,r in zip(sp_file_contents, sp_file_result): 
-            final.append(c)
-            final.append(r)
-        for i, k in enumerate(final): 
-            if i%2==0:
-                orig = k
-            if i%2==1:
-                trans = k
-                ch.add_row(a=orig, b=trans)
-        create_report_file_name = f"{gen_time_str()}.trans.html"
-        ch.save_file(create_report_file_name)
-        shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
-        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
-    except:
-        from toolbox import trimmed_format_exc
-        print('writing html result failed:', trimmed_format_exc())

 def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
    import time, os, re
-    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
-    from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
+    from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    from .latex_actions import LatexPaperFileGroup, LatexPaperSplit

    #  <-------- 寻找主tex文件 ----------> 
-    maintex = 寻找Latex主文件(file_manifest, mode)
+    maintex = find_main_tex_file(file_manifest, mode)
    chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果：该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
    time.sleep(3)
@ -668,54 +281,51 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
    #  <-------- 写出文件 ----------> 
    msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
    final_tex = lps.merge_result(pfg.file_result, mode, msg)
+    objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
+
    with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
        if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
        

    #  <-------- 整理结果, 退出 ----------> 
-    chatbot.append((f"完成了吗？", 'GPT结果已输出, 正在编译PDF'))
+    chatbot.append((f"完成了吗？", 'GPT结果已输出, 即将编译PDF'))
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

    #  <-------- 返回 ----------> 
    return project_folder + f'/merge_{mode}.tex'


-
-def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
+def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
    try:
        with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
            log = f.read()
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            file_lines = f.readlines()
        import re
        buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
        buggy_lines = [int(l) for l in buggy_lines]
        buggy_lines = sorted(buggy_lines)
-        print("removing lines that has errors", buggy_lines)
-        file_lines.pop(buggy_lines[0]-1)
+        buggy_line = buggy_lines[0]-1
+        print("reversing tex line that has errors", buggy_line)
+
+        # 重组，逆转出错的段落
+        if buggy_line not in fixed_line:
+            fixed_line.append(buggy_line)
+
+        lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
+        final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
+
        with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
-            f.writelines(file_lines)
+            f.write(final_tex)
+
        return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
    except:
        print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
        return False, -1, [-1]
-    
-def compile_latex_with_timeout(command, cwd, timeout=60):
-    import subprocess
-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
-    try:
-        stdout, stderr = process.communicate(timeout=timeout)
-    except subprocess.TimeoutExpired:
-        process.kill()
-        stdout, stderr = process.communicate()
-        print("Process timed out!")
-        return False
-    return True
+

 def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
    import os, time
-    current_dir = os.getcwd()
    n_fix = 1
+    fixed_line = []
    max_try = 32
    chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}，如果程序停顿5分钟以上，请直接去该路径下取回翻译结果，或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
    chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
@ -723,6 +333,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f

    while True:
        import os
+        may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
+        target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
+        if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
+            shutil.copyfile(may_exist_bbl, target_bbl)

        # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
@ -756,7 +370,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)

-
        # <---------- 检查结果 ----------->
        results_ = ""
        original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
@ -780,10 +393,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
            # 将两个PDF拼接
            if original_pdf_success: 
                try:
+                    from .latex_toolbox import merge_pdfs
                    concat_pdf = pj(work_folder_modified, f'comparison.pdf')
                    merge_pdfs(origin_pdf, result_pdf, concat_pdf)
                    promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
-                except:
+                except Exception as e:
                    pass
            return True # 成功啦
        else:
@ -796,6 +410,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                tex_name_pure=f'{main_file_modified}',
                n_fix=n_fix,
                work_folder_modified=work_folder_modified,
+                fixed_line=fixed_line
            )
            yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
            if not can_retry: break
@ -803,38 +418,29 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
    return False # 失败啦


-def merge_pdfs(pdf1_path, pdf2_path, output_path):
-    import PyPDF2
-    # Open the first PDF file
-    with open(pdf1_path, 'rb') as pdf1_file:
-        pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
-        # Open the second PDF file
-        with open(pdf2_path, 'rb') as pdf2_file:
-            pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
-            # Create a new PDF file to store the merged pages
-            output_writer = PyPDF2.PdfFileWriter()
-            # Determine the number of pages in each PDF file
-            num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
-            # Merge the pages from the two PDF files
-            for page_num in range(num_pages):
-                # Add the page from the first PDF file
-                if page_num < pdf1_reader.numPages:
-                    page1 = pdf1_reader.getPage(page_num)
-                else:
-                    page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
-                # Add the page from the second PDF file
-                if page_num < pdf2_reader.numPages:
-                    page2 = pdf2_reader.getPage(page_num)
-                else:
-                    page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
-                # Create a new empty page with double width
-                new_page = PyPDF2.PageObject.createBlankPage(
-                    width=2 * max(page1.mediaBox.getWidth(), page2.mediaBox.getWidth()),
-                    height=max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
-                )
-                new_page.mergeTranslatedPage(page1, 0, 0)
-                new_page.mergeTranslatedPage(page2, page1.mediaBox.getWidth(), 0)
-                output_writer.addPage(new_page)
-            # Save the merged PDF file
-            with open(output_path, 'wb') as output_file:
-                output_writer.write(output_file)
+def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
+    # write html
+    try:
+        import shutil
+        from ..crazy_utils import construct_html
+        from toolbox import gen_time_str
+        ch = construct_html() 
+        orig = ""
+        trans = ""
+        final = []
+        for c,r in zip(sp_file_contents, sp_file_result): 
+            final.append(c)
+            final.append(r)
+        for i, k in enumerate(final): 
+            if i%2==0:
+                orig = k
+            if i%2==1:
+                trans = k
+                ch.add_row(a=orig, b=trans)
+        create_report_file_name = f"{gen_time_str()}.trans.html"
+        ch.save_file(create_report_file_name)
+        shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
+        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
+    except:
+        from toolbox import trimmed_format_exc
+        print('writing html result failed:', trimmed_format_exc())
--- a/crazy_functions/latex_fns/latex_toolbox.py
+++ b/crazy_functions/latex_fns/latex_toolbox.py
@ -0,0 +1,456 @@
+import os, shutil
+import re
+import numpy as np
+PRESERVE = 0
+TRANSFORM = 1
+
+pj = os.path.join
+
+class LinkedListNode():
+    """
+    Linked List Node
+    """
+    def __init__(self, string, preserve=True) -> None:
+        self.string = string
+        self.preserve = preserve
+        self.next = None
+        self.range = None
+        # self.begin_line = 0
+        # self.begin_char = 0
+
+def convert_to_linklist(text, mask):
+    root = LinkedListNode("", preserve=True)
+    current_node = root
+    for c, m, i in zip(text, mask, range(len(text))):
+        if (m==PRESERVE and current_node.preserve) \
+            or (m==TRANSFORM and not current_node.preserve):
+            # add
+            current_node.string += c
+        else:
+            current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
+            current_node = current_node.next
+    return root
+
+def post_process(root):
+    # 修复括号
+    node = root
+    while True:
+        string = node.string
+        if node.preserve: 
+            node = node.next
+            if node is None: break
+            continue
+        def break_check(string):
+            str_stack = [""] # (lv, index)
+            for i, c in enumerate(string):
+                if c == '{':
+                    str_stack.append('{')
+                elif c == '}':
+                    if len(str_stack) == 1:
+                        print('stack fix')
+                        return i
+                    str_stack.pop(-1)
+                else:
+                    str_stack[-1] += c
+            return -1
+        bp = break_check(string)
+
+        if bp == -1:
+            pass
+        elif bp == 0:
+            node.string = string[:1]
+            q = LinkedListNode(string[1:], False)
+            q.next = node.next
+            node.next = q
+        else:
+            node.string = string[:bp]
+            q = LinkedListNode(string[bp:], False)
+            q.next = node.next
+            node.next = q
+
+        node = node.next
+        if node is None: break
+
+    # 屏蔽空行和太短的句子
+    node = root
+    while True:
+        if len(node.string.strip('\n').strip(''))==0: node.preserve = True
+        if len(node.string.strip('\n').strip(''))<42: node.preserve = True
+        node = node.next
+        if node is None: break
+    node = root
+    while True:
+        if node.next and node.preserve and node.next.preserve:
+            node.string += node.next.string
+            node.next = node.next.next
+        node = node.next
+        if node is None: break
+
+    # 将前后断行符脱离
+    node = root
+    prev_node = None
+    while True:
+        if not node.preserve:
+            lstriped_ = node.string.lstrip().lstrip('\n')
+            if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
+                prev_node.string += node.string[:-len(lstriped_)]
+                node.string = lstriped_
+            rstriped_ = node.string.rstrip().rstrip('\n')
+            if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
+                node.next.string = node.string[len(rstriped_):] + node.next.string
+                node.string = rstriped_
+        # =====
+        prev_node = node
+        node = node.next
+        if node is None: break
+
+    # 标注节点的行数范围
+    node = root
+    n_line = 0
+    expansion = 2
+    while True:
+        n_l = node.string.count('\n')
+        node.range = [n_line-expansion, n_line+n_l+expansion]   # 失败时，扭转的范围
+        n_line = n_line+n_l
+        node = node.next
+        if node is None: break
+    return root
+
+
+"""
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+"""
+
+
+def set_forbidden_text(text, mask, pattern, flags=0):
+    """
+    Add a preserve text area in this paper
+    e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
+    you can mask out (mask = PRESERVE so that text become untouchable for GPT) 
+    everything between "\begin{equation}" and "\end{equation}"
+    """
+    if isinstance(pattern, list): pattern = '|'.join(pattern)
+    pattern_compile = re.compile(pattern, flags)
+    for res in pattern_compile.finditer(text):
+        mask[res.span()[0]:res.span()[1]] = PRESERVE
+    return text, mask
+
+def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
+    """
+    Move area out of preserve area (make text editable for GPT)
+    count the number of the braces so as to catch compelete text area. 
+    e.g.
+    \begin{abstract} blablablablablabla. \end{abstract} 
+    """
+    if isinstance(pattern, list): pattern = '|'.join(pattern)
+    pattern_compile = re.compile(pattern, flags)
+    for res in pattern_compile.finditer(text):
+        if not forbid_wrapper:
+            mask[res.span()[0]:res.span()[1]] = TRANSFORM
+        else:
+            mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE   # '\\begin{abstract}'
+            mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM   # abstract
+            mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE   # abstract
+    return text, mask
+
+def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
+    """
+    Add a preserve text area in this paper (text become untouchable for GPT).
+    count the number of the braces so as to catch compelete text area. 
+    e.g.
+    \caption{blablablablabla\texbf{blablabla}blablabla.} 
+    """
+    pattern_compile = re.compile(pattern, flags)
+    for res in pattern_compile.finditer(text):
+        brace_level = -1
+        p = begin = end = res.regs[0][0]
+        for _ in range(1024*16):
+            if text[p] == '}' and brace_level == 0: break
+            elif text[p] == '}':  brace_level -= 1
+            elif text[p] == '{':  brace_level += 1
+            p += 1
+        end = p+1
+        mask[begin:end] = PRESERVE
+    return text, mask
+
+def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
+    """
+    Move area out of preserve area (make text editable for GPT)
+    count the number of the braces so as to catch compelete text area. 
+    e.g.
+    \caption{blablablablabla\texbf{blablabla}blablabla.} 
+    """
+    pattern_compile = re.compile(pattern, flags)
+    for res in pattern_compile.finditer(text):
+        brace_level = 0
+        p = begin = end = res.regs[1][0]
+        for _ in range(1024*16):
+            if text[p] == '}' and brace_level == 0: break
+            elif text[p] == '}':  brace_level -= 1
+            elif text[p] == '{':  brace_level += 1
+            p += 1
+        end = p
+        mask[begin:end] = TRANSFORM
+        if forbid_wrapper:
+            mask[res.regs[0][0]:begin] = PRESERVE
+            mask[end:res.regs[0][1]] = PRESERVE
+    return text, mask
+
+def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
+    """
+    Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
+    Add it to preserve area
+    """
+    pattern_compile = re.compile(pattern, flags)
+    def search_with_line_limit(text, mask):
+        for res in pattern_compile.finditer(text):
+            cmd = res.group(1)  # begin{what}
+            this = res.group(2) # content between begin and end
+            this_mask = mask[res.regs[2][0]:res.regs[2][1]]
+            white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 
+                          'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
+            if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
+                this, this_mask = search_with_line_limit(this, this_mask)
+                mask[res.regs[2][0]:res.regs[2][1]] = this_mask
+            else:
+                mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
+        return text, mask
+    return search_with_line_limit(text, mask) 
+
+
+
+"""
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Latex Merge File
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+"""
+
+def find_main_tex_file(file_manifest, mode):
+    """
+    在多Tex文档中，寻找主文件，必须包含documentclass，返回找到的第一个。
+    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
+    """
+    canidates = []
+    for texf in file_manifest:
+        if os.path.basename(texf).startswith('merge'):
+            continue
+        with open(texf, 'r', encoding='utf8', errors='ignore') as f:
+            file_content = f.read()
+        if r'\documentclass' in file_content:
+            canidates.append(texf)
+        else:
+            continue
+
+    if len(canidates) == 0:
+        raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
+    elif len(canidates) == 1:
+        return canidates[0]
+    else: # if len(canidates) >= 2 通过一些Latex模板中常见（但通常不会出现在正文）的单词，对不同latex源文件扣分，取评分最高者返回
+        canidates_score = []
+        # 给出一些判定模板文档的词作为扣分项
+        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
+        expected_words = ['\input', '\ref', '\cite']
+        for texf in canidates:
+            canidates_score.append(0)
+            with open(texf, 'r', encoding='utf8', errors='ignore') as f:
+                file_content = f.read()
+            for uw in unexpected_words:
+                if uw in file_content:
+                    canidates_score[-1] -= 1
+            for uw in expected_words:
+                if uw in file_content:
+                    canidates_score[-1] += 1
+        select = np.argmax(canidates_score) # 取评分最高者返回
+        return canidates[select]
+    
+def rm_comments(main_file):
+    new_file_remove_comment_lines = []
+    for l in main_file.splitlines():
+        # 删除整行的空注释
+        if l.lstrip().startswith("%"):
+            pass
+        else:
+            new_file_remove_comment_lines.append(l)
+    main_file = '\n'.join(new_file_remove_comment_lines)
+    # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file)  # 将 \include 命令转换为 \input 命令
+    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
+    return main_file
+
+def find_tex_file_ignore_case(fp):
+    dir_name = os.path.dirname(fp)
+    base_name = os.path.basename(fp)
+    if not base_name.endswith('.tex'): base_name+='.tex'
+    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
+    # go case in-sensitive
+    import glob
+    for f in glob.glob(dir_name+'/*.tex'):
+        base_name_s = os.path.basename(fp)
+        if base_name_s.lower() == base_name.lower(): return f
+    return None
+
+def merge_tex_files_(project_foler, main_file, mode):
+    """
+    Merge Tex project recrusively
+    """
+    main_file = rm_comments(main_file)
+    for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
+        f = s.group(1)
+        fp = os.path.join(project_foler, f)
+        fp = find_tex_file_ignore_case(fp)
+        if fp:
+            with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
+        else:
+            raise RuntimeError(f'找不到{fp}，Tex源文件缺失！')
+        c = merge_tex_files_(project_foler, c, mode)
+        main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
+    return main_file
+
+def merge_tex_files(project_foler, main_file, mode):
+    """
+    Merge Tex project recrusively
+    P.S. 顺便把CTEX塞进去以支持中文
+    P.S. 顺便把Latex的注释去除
+    """
+    main_file = merge_tex_files_(project_foler, main_file, mode)
+    main_file = rm_comments(main_file)
+
+    if mode == 'translate_zh':
+        # find paper documentclass
+        pattern = re.compile(r'\\documentclass.*\n')
+        match = pattern.search(main_file)
+        assert match is not None, "Cannot find documentclass statement!"
+        position = match.end()
+        add_ctex = '\\usepackage{ctex}\n'
+        add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
+        main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
+        # fontset=windows
+        import platform
+        main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
+        main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
+        # find paper abstract
+        pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
+        pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
+        match_opt1 = pattern_opt1.search(main_file)
+        match_opt2 = pattern_opt2.search(main_file)
+        assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
+    return main_file
+
+
+"""
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+Post process
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+"""
+def mod_inbraket(match):
+    """
+    为啥chatgpt会把cite里面的逗号换成中文逗号呀 
+    """
+    # get the matched string
+    cmd = match.group(1)
+    str_to_modify = match.group(2)
+    # modify the matched string
+    str_to_modify = str_to_modify.replace('：', ':')    # 前面是中文冒号，后面是英文冒号
+    str_to_modify = str_to_modify.replace('，', ',')    # 前面是中文逗号，后面是英文逗号
+    # str_to_modify = 'BOOM'
+    return "\\" + cmd + "{" + str_to_modify + "}"
+
+def fix_content(final_tex, node_string):
+    """
+    Fix common GPT errors to increase success rate
+    """
+    final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
+    final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
+    final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
+    final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
+
+    if "Traceback" in final_tex and "[Local Message]" in final_tex:
+        final_tex = node_string # 出问题了，还原原文
+    if node_string.count('\\begin') != final_tex.count('\\begin'):
+        final_tex = node_string # 出问题了，还原原文
+    if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
+        # walk and replace any _ without \
+        final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
+
+    def compute_brace_level(string):
+        # this function count the number of { and }
+        brace_level = 0
+        for c in string:
+            if c == "{": brace_level += 1
+            elif c == "}": brace_level -= 1
+        return brace_level
+    def join_most(tex_t, tex_o):
+        # this function join translated string and original string when something goes wrong
+        p_t = 0
+        p_o = 0
+        def find_next(string, chars, begin):
+            p = begin
+            while p < len(string):
+                if string[p] in chars: return p, string[p]
+                p += 1
+            return None, None
+        while True:
+            res1, char = find_next(tex_o, ['{','}'], p_o)
+            if res1 is None: break
+            res2, char = find_next(tex_t, [char], p_t)
+            if res2 is None: break
+            p_o = res1 + 1
+            p_t = res2 + 1
+        return tex_t[:p_t] + tex_o[p_o:]
+
+    if compute_brace_level(final_tex) != compute_brace_level(node_string):
+        # 出问题了，还原部分原文，保证括号正确
+        final_tex = join_most(final_tex, node_string)
+    return final_tex
+    
+def compile_latex_with_timeout(command, cwd, timeout=60):
+    import subprocess
+    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
+    try:
+        stdout, stderr = process.communicate(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        process.kill()
+        stdout, stderr = process.communicate()
+        print("Process timed out!")
+        return False
+    return True
+
+
+
+def merge_pdfs(pdf1_path, pdf2_path, output_path):
+    import PyPDF2
+    Percent = 0.8
+    # Open the first PDF file
+    with open(pdf1_path, 'rb') as pdf1_file:
+        pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
+        # Open the second PDF file
+        with open(pdf2_path, 'rb') as pdf2_file:
+            pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
+            # Create a new PDF file to store the merged pages
+            output_writer = PyPDF2.PdfFileWriter()
+            # Determine the number of pages in each PDF file
+            num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
+            # Merge the pages from the two PDF files
+            for page_num in range(num_pages):
+                # Add the page from the first PDF file
+                if page_num < pdf1_reader.numPages:
+                    page1 = pdf1_reader.getPage(page_num)
+                else:
+                    page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
+                # Add the page from the second PDF file
+                if page_num < pdf2_reader.numPages:
+                    page2 = pdf2_reader.getPage(page_num)
+                else:
+                    page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
+                # Create a new empty page with double width
+                new_page = PyPDF2.PageObject.createBlankPage(
+                    width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
+                    height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
+                )
+                new_page.mergeTranslatedPage(page1, 0, 0)
+                new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
+                output_writer.addPage(new_page)
+            # Save the merged PDF file
+            with open(output_path, 'wb') as output_file:
+                output_writer.write(output_file)
--- a/crazy_functions/虚空终端.py
+++ b/crazy_functions/虚空终端.py
@ -1,87 +1,70 @@
 from toolbox import CatchException, update_ui, gen_time_str
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 from .crazy_utils import input_clipping
+import copy, json

-
-prompt = """
-I have to achieve some functionalities by calling one of the functions below.
-Your job is to find the correct funtion to use to satisfy my requirement,
-and then write python code to call this function with correct parameters.
-
-These are functions you are allowed to choose from:
-1. 
-    功能描述: 总结音视频内容
-    调用函数: ConcludeAudioContent(txt, llm_kwargs)
-    参数说明: 
-            txt: 音频文件的路径
-            llm_kwargs: 模型参数, 永远给定None
-2. 
-    功能描述: 将每次对话记录写入Markdown格式的文件中
-    调用函数: WriteMarkdown()
-3.
-    功能描述: 将指定目录下的PDF文件从英文翻译成中文
-    调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs)
-    参数说明: 
-            txt: PDF文件所在的路径
-            llm_kwargs: 模型参数, 永远给定None
-4.
-    功能描述: 根据文本使用GPT模型生成相应的图像
-    调用函数: ImageGeneration(txt, llm_kwargs)
-    参数说明: 
-            txt: 图像生成所用到的提示文本
-            llm_kwargs: 模型参数, 永远给定None
-5.
-    功能描述: 对输入的word文档进行摘要生成 
-    调用函数: SummarizingWordDocuments(input_path, output_path)
-    参数说明: 
-            input_path: 待处理的word文档路径
-            output_path: 摘要生成后的文档路径
-
-
-You should always anwser with following format:
----------------
-Code:
-```
-class AutoAcademic(object):
-    def __init__(self):
-        self.selected_function = "FILL_CORRECT_FUNCTION_HERE"      # e.g., "GenerateImage"
-        self.txt = "FILL_MAIN_PARAMETER_HERE"      # e.g., "荷叶上的蜻蜓"
-        self.llm_kwargs = None
-```
-Explanation:
-只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。
----------------
-
-Now, this is my requirement: 
-
-"""
 def get_fn_lib():
    return {
-        "BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程",  "批量翻译PDF文档"),
-        "SummarizingWordDocuments": ("crazy_functions.总结word文档",  "总结word文档"),
-        "ImageGeneration": ("crazy_functions.图片生成",  "图片生成"),
-        "TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译",  "Markdown中译英"),
-        "SummaryAudioVideo": ("crazy_functions.总结音视频",  "总结音视频"),
+        "BatchTranslatePDFDocuments_MultiThreaded": {
+                "module": "crazy_functions.批量翻译PDF文档_多线程",  
+                "function": "批量翻译PDF文档",
+                "description": "Translate PDF Documents",
+                "arg_1_description": "A path containing pdf files.",
+            },
+        "SummarizingWordDocuments": {
+                "module": "crazy_functions.总结word文档",  
+                "function": "总结word文档",
+                "description": "Summarize Word Documents",
+                "arg_1_description": "A path containing Word files.",
+            },
+        "ImageGeneration": {
+                "module": "crazy_functions.图片生成",  
+                "function": "图片生成",
+                "description": "Generate a image that satisfies some description.",
+                "arg_1_description": "Descriptions about the image to be generated.",
+            },
+        "TranslateMarkdownFromEnglishToChinese": {
+                "module": "crazy_functions.批量Markdown翻译",  
+                "function": "Markdown中译英",
+                "description": "Translate Markdown Documents from English to Chinese.",
+                "arg_1_description": "A path containing Markdown files.",
+            },
+        "SummaryAudioVideo": {
+                "module": "crazy_functions.总结音视频",  
+                "function": "总结音视频",
+                "description": "Get text from a piece of audio and summarize this audio.",
+                "arg_1_description": "A path containing audio files.",
+            },
    }

+functions = [
+    {
+        "name": k,
+        "description": v['description'],
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "plugin_arg_1": {
+                    "type": "string",
+                    "description": v['arg_1_description'],
+                },
+            },
+            "required": ["plugin_arg_1"],
+        },
+    } for k, v in get_fn_lib().items()
+]
+
 def inspect_dependency(chatbot, history):
    return True

 def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
-    import subprocess, sys, os, shutil, importlib
-
-    with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f:
-        f.write(code)
-
+    import importlib
    try:
-        AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic')
-        # importlib.reload(AutoAcademic)
-        auto_dict = AutoAcademic()
-        selected_function = auto_dict.selected_function
-        txt = auto_dict.txt
-        fp, fn = get_fn_lib()[selected_function]
+        tmp = get_fn_lib()[code['name']]
+        fp, fn = tmp['module'], tmp['function']
        fn_plugin = getattr(importlib.import_module(fp, fn), fn)
-        yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
+        arg = json.loads(code['arguments'])['plugin_arg_1']
+        yield from fn_plugin(arg, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
    except:
        from toolbox import trimmed_format_exc
        chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"])
@ -110,22 +93,27 @@ def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_
    history = []    

    # 基本信息：功能、贡献者
-    chatbot.append(["函数插件功能？", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."])
+    chatbot.append(["虚空终端插件的功能？", "根据自然语言的描述, 执行任意插件的命令."])
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
-    # # 尝试导入依赖, 如果缺少依赖, 则给出安装建议
-    # dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
-    # if not dep_ok: return
    
    # 输入
-    i_say = prompt + txt
+    i_say = txt
    # 开始
+    llm_kwargs_function_call = copy.deepcopy(llm_kwargs)
+    llm_kwargs_function_call['llm_model'] = 'gpt-call-fn' # 修改调用函数
    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
        inputs=i_say, inputs_show_user=txt, 
-        llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], 
-        sys_prompt=""
+        llm_kwargs=llm_kwargs_function_call, chatbot=chatbot, history=[], 
+        sys_prompt=functions
    )

    # 将代码转为动画
-    code = get_code_block(gpt_say)
-    yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
+    res = json.loads(gpt_say)['choices'][0]
+    if res['finish_reason'] == 'function_call':
+        code = json.loads(gpt_say)['choices'][0]
+        yield from eval_code(code['message']['function_call'], llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
+    else:
+        chatbot.append(["无法调用相关功能", res])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+
--- a/crazy_functions/虚空终端CodeInterpreter.py
+++ b/crazy_functions/虚空终端CodeInterpreter.py
@ -0,0 +1,213 @@
+from collections.abc import Callable, Iterable, Mapping
+from typing import Any
+from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, promote_file_to_downloadzone, clear_file_downloadzone
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from .crazy_utils import input_clipping, try_install_deps
+from multiprocessing import Process, Pipe
+import os
+
+templete = """
+```python
+import ...  # Put dependencies here, e.g. import numpy as np
+
+class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
+
+    def run(self, path):    # The name of the function must be `run`, it takes only a positional argument.
+        # rewrite the function you have just written here 
+        ...
+        return generated_file_path
+```
+"""
+
+def inspect_dependency(chatbot, history):
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    return True
+
+def get_code_block(reply):
+    import re
+    pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
+    matches = re.findall(pattern, reply) # find all code blocks in text
+    if len(matches) == 1: 
+        return matches[0].strip('python') #  code block
+    for match in matches:
+        if 'class TerminalFunction' in match:
+            return match.strip('python') #  code block
+    raise RuntimeError("GPT is not generating proper code.")
+
+def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
+    # 输入
+    prompt_compose = [
+        f'Your job:\n'
+        f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
+        f"2. You should write this function to perform following task: " + txt + "\n",
+        f"3. Wrap the output python function with markdown codeblock."
+    ]
+    i_say = "".join(prompt_compose)
+    demo = []
+
+    # 第一步
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=i_say, 
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo, 
+        sys_prompt= r"You are a programmer."
+    )
+    history.extend([i_say, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+
+    # 第二步
+    prompt_compose = [
+        "If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
+        templete
+    ]
+    i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=inputs_show_user, 
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, 
+        sys_prompt= r"You are a programmer."
+    )
+    code_to_return = gpt_say
+    history.extend([i_say, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+    
+    # # 第三步
+    # i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
+    # i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
+    # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
+    #     inputs=i_say, inputs_show_user=inputs_show_user, 
+    #     llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, 
+    #     sys_prompt= r"You are a programmer."
+    # )
+    # # # 第三步  
+    # i_say = "Show me how to use `pip` to install packages to run the code above. "
+    # i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
+    # installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
+    #     inputs=i_say, inputs_show_user=i_say, 
+    #     llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, 
+    #     sys_prompt= r"You are a programmer."
+    # )
+    installation_advance = ""
+    
+    return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
+
+def make_module(code):
+    module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
+    with open(f'gpt_log/{module_file}.py', 'w', encoding='utf8') as f:
+        f.write(code)
+
+    def get_class_name(class_string):
+        import re
+        # Use regex to extract the class name
+        class_name = re.search(r'class (\w+)\(', class_string).group(1)
+        return class_name
+
+    class_name = get_class_name(code)
+    return f"gpt_log.{module_file}->{class_name}"
+
+def init_module_instance(module):
+    import importlib
+    module_, class_ = module.split('->')
+    init_f = getattr(importlib.import_module(module_), class_)
+    return init_f()
+
+def for_immediate_show_off_when_possible(file_type, fp, chatbot):
+    if file_type in ['png', 'jpg']:
+        image_path = os.path.abspath(fp)
+        chatbot.append(['这是一张图片, 展示如下:',  
+            f'本地文件地址: <br/>`{image_path}`<br/>'+
+            f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
+        ])
+    return chatbot
+
+def subprocess_worker(instance, file_path, return_dict):
+    return_dict['result'] = instance.run(file_path)
+
+@CatchException
+def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    # 清空历史，以免输入溢出
+    history = []; clear_file_downloadzone(chatbot)
+
+    # 基本信息：功能、贡献者
+    chatbot.append([
+        "函数插件功能？",
+        "CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 作者: binary-husky, 插件初始化中 ..."
+    ])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    # 尝试导入依赖, 如果缺少依赖, 则给出安装建议
+    dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
+    if not dep_ok: return
+    
+    # 读取文件
+    if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
+    recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
+    file_path = recently_uploaded_files[-1]
+    file_type = file_path.split('.')[-1]
+
+    # 粗心检查
+    if 'private_upload' in txt:
+        chatbot.append([
+            "...",
+            f"请在输入框内填写需求，然后再次点击该插件（文件路径 {file_path} 已经被记忆）"
+        ])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    
+    # 开始干正事
+    for j in range(5):  # 最多重试5次
+        try:
+            code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
+                yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
+            code = get_code_block(code)
+            res = make_module(code)
+            instance = init_module_instance(res)
+            break
+        except Exception as e:
+            chatbot.append([f"第{j}次代码生成尝试，失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
+            yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    # 代码生成结束, 开始执行
+    try:
+        import multiprocessing
+        manager = multiprocessing.Manager()
+        return_dict = manager.dict()
+
+        p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
+        # only has 10 seconds to run
+        p.start(); p.join(timeout=10)
+        if p.is_alive(): p.terminate(); p.join()
+        p.close()
+        res = return_dict['result']
+        # res = instance.run(file_path)
+    except Exception as e:
+        chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
+        # chatbot.append(["如果是缺乏依赖，请参考以下建议", installation_advance])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+
+    # 顺利完成，收尾
+    res = str(res)
+    if os.path.exists(res):
+        chatbot.append(["执行成功了，结果是一个有效文件", "结果：" + res])
+        new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
+        chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+    else:
+        chatbot.append(["执行成功了，结果是一个字符串", "结果：" + res])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新   
+
+"""
+测试：
+    裁剪图像，保留下半部分
+    交换图像的蓝色通道和红色通道
+    将图像转为灰度图像
+    将csv文件转excel表格
+"""
--- a/crazy_functions/解析项目源代码炫酷版.py
+++ b/crazy_functions/解析项目源代码炫酷版.py
@ -0,0 +1,137 @@
+from toolbox import update_ui
+from toolbox import CatchException, report_execption, write_results_to_file
+from toolbox import objdump, objload
+from .crazy_utils import input_clipping
+
+class ThreeJSPlot():
+    def __init__(self) -> None:
+        self.files = None
+
+    def read_files(self, files):
+        self.files = files
+
+    def launch_render_interface(self):
+        from vhmap.mcom import mcom
+        self.visual_bridge = mcom()
+
+
+
+def 解析源代码炫酷版(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
+    import os, copy
+    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+    msg = '正常'
+    summary_batch_isolation = True
+    inputs_array = []
+    inputs_show_user_array = []
+    history_array = []
+    sys_prompt_array = []
+    report_part_1 = []
+
+    assert len(file_manifest) <= 512, "源文件太多（超过512个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
+    ############################## <第一步，逐个文件分析，多线程> ##################################
+    for index, fp in enumerate(file_manifest):
+        # 读取文件
+        with open(fp, 'r', encoding='utf-8', errors='replace') as f:
+            file_content = f.read()
+        prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
+        # 装载请求内容
+        inputs_array.append(i_say)
+        inputs_show_user_array.append(i_say_show_user)
+        history_array.append([])
+        sys_prompt_array.append("你是一个程序架构分析师，正在分析一个源代码项目。你的回答必须简单明了。")
+
+    def callback_when_intel_update(gpt_reply_array):
+        objdump((gpt_reply_array, file_manifest))
+        return
+
+
+    # 文件读取完成，对每一个源代码文件，生成一个请求线程，发送到chatgpt进行分析
+    gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+        inputs_array = inputs_array,
+        inputs_show_user_array = inputs_show_user_array,
+        history_array = history_array,
+        sys_prompt_array = sys_prompt_array,
+        llm_kwargs = llm_kwargs,
+        chatbot = chatbot,
+        show_user_at_complete = True,
+        callback_fn=callback_when_intel_update,
+    )
+
+    # 全部文件解析完成，结果写入文件，准备对工程源代码进行汇总分析
+    report_part_1 = copy.deepcopy(gpt_response_collection)
+    history_to_return = report_part_1
+    res = write_results_to_file(report_part_1)
+    chatbot.append(("完成？", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
+    yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
+    ############################## <第二步，综合，单线程，分组+迭代处理> ##################################
+
+
+    ############################## <END> ##################################
+    history_to_return.extend([])
+    res = write_results_to_file(history_to_return)
+    chatbot.append(("完成了吗？", res))
+    yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
+
+
+@CatchException
+def 解析一个Python项目炫酷版(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码炫酷版(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+
+
+
+@CatchException
+def 解析任意code项目炫酷版(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    txt_pattern = plugin_kwargs.get("advanced_arg")
+    txt_pattern = txt_pattern.replace("，", ",")
+    # 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
+    pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
+    if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
+    # 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
+    pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
+    pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
+    # 将要忽略匹配的文件名(例如: ^README.md)
+    pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
+    # 生成正则表达式
+    pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
+    pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
+
+    history.clear()
+    import glob, os, re
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
+    maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
+    if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
+        extract_folder_path = maybe_dir[0]
+    else:
+        extract_folder_path = project_folder
+    # 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
+    file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
+                      os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
--- a/main.py
+++ b/main.py
@ -160,7 +160,7 @@ def main():
            click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
            cancel_handles.append(click_handle)
        # 文件上传区，接收文件后与chatbot的互动
-        file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes], [chatbot, txt, txt2])
+        file_upload.upload(on_file_uploaded, [cookies, file_upload, chatbot, txt, txt2, checkboxes], [cookies, chatbot, txt, txt2])
        # 函数插件-固定按钮区
        for k in crazy_fns:
            if not crazy_fns[k].get("AsButton", True): continue
--- a/request_llm/edge_gpt_free.py
+++ b/request_llm/edge_gpt_free.py
@ -519,7 +519,11 @@ class _ChatHub:
        resp_txt_no_link = ""
        while not final:
            msg = await self.wss.receive()
-            objects = msg.data.split(DELIMITER)
+            try:
+                objects = msg.data.split(DELIMITER)
+            except :
+                continue
+            
            for obj in objects:
                if obj is None or not obj:
                    continue
--- a/requirements.txt
+++ b/requirements.txt
@ -18,4 +18,4 @@ openai
 numpy
 arxiv
 rich
-pypdf2
+pypdf2==2.12.1
--- a/toolbox.py
+++ b/toolbox.py
@ -60,6 +60,9 @@ def ArgsGeneralWrapper(f):
        plugin_kwargs = {
            "advanced_arg": plugin_advanced_arg,
        }
+        if "recently_uploaded_files" in cookies:
+            plugin_kwargs.update({"recently_uploaded_files": cookies["recently_uploaded_files"]})
+
        chatbot_with_cookie = ChatBotWithCookies(cookies)
        chatbot_with_cookie.write_list(chatbot)
        if cookies.get('lock_plugin', None) is None:
@ -462,6 +465,10 @@ def find_recent_files(directory):

    return recent_files

+def clear_file_downloadzone(chatbot):
+    if chatbot:
+        chatbot._cookies.update({'file_to_promote': []})
+    
 def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
    # 将文件复制一份到下载区
    import shutil
@ -476,13 +483,14 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
        if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote']
        else: current = []
        chatbot._cookies.update({'file_to_promote': [new_path] + current})
+    return new_path

-def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
+def on_file_uploaded(cookies, files, chatbot, txt, txt2, checkboxes):
    """
    当文件被上传时的回调函数
    """
    if len(files) == 0:
-        return chatbot, txt
+        return cookies, chatbot, txt
    import shutil
    import os
    import time
@ -512,7 +520,8 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
                    f'[Local Message] 收到以下文件: \n\n{moved_files_str}' +
                    f'\n\n调用路径参数已自动修正到: \n\n{txt}' +
                    f'\n\n现在您点击任意“红颜色”标识的函数插件时，以上文件将被作为输入参数'+err_msg])
-    return chatbot, txt, txt2
+    cookies.update({"recently_uploaded_files": moved_files})
+    return cookies, chatbot, txt, txt2


 def on_report_generated(cookies, files, chatbot):
@ -538,7 +547,11 @@ def load_chat_cookies():
    return {'api_key': API_KEY, 'llm_model': LLM_MODEL}

 def is_openai_api_key(key):
-    API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
+    CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
+    if len(CUSTOM_API_KEY_PATTERN) != 0:
+        API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
+    else:
+        API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
    return bool(API_MATCH_ORIGINAL)

 def is_azure_api_key(key):
@ -594,7 +607,7 @@ def select_api_key(keys, llm_model):
            if is_azure_api_key(k): avail_key_list.append(k)

    if len(avail_key_list) == 0:
-        raise RuntimeError(f"您提供的api-key不满足要求，不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源（右下角更换模型菜单中可切换openai,azure和api2d请求源）")
+        raise RuntimeError(f"您提供的api-key不满足要求，不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源（右下角更换模型菜单中可切换openai,azure,claude,api2d等请求源）。")

    api_key = random.choice(avail_key_list) # 随机负载均衡
    return api_key
@ -670,12 +683,12 @@ def read_single_conf_with_lru_cache(arg):

    # 在读取API_KEY时，检查一下是不是忘了改config
    if arg == 'API_KEY':
-        print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和API2D的api-key。也支持同时填写多个api-key，如API_KEY=\"openai-key1,openai-key2,api2d-key3\"")
+        print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key，如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
        print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s)，也可以在问题输入区输入临时的api-key(s)，然后回车键提交后即可生效。")
        if is_any_api_key(r):
            print亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
        else:
-            print亮红( "[API_KEY] 正确的 API_KEY 是'sk'开头的51位密钥（OpenAI），或者 'fk'开头的41位密钥，请在config文件中修改API密钥之后再运行。")
+            print亮红( "[API_KEY] 您的 API_KEY 不满足任何一种已知的密钥格式，请在config文件中修改API密钥之后再运行。")
    if arg == 'proxies':
        if r is None:
            print亮红('[PROXY] 网络代理状态：未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议：检查USE_PROXY选项是否修改。')
@ -685,6 +698,7 @@ def read_single_conf_with_lru_cache(arg):
    return r


+@lru_cache(maxsize=128)
 def get_conf(*args):
    # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
    res = []
Author	SHA1	Message	Date
qingxu fu	fb22f716ff	3d interp	2023-07-25 10:09:50 +08:00
qingxu fu	a8093b9dd8	多线程约束越大执行时间	2023-07-22 03:05:04 +08:00
qingxu fu	4f55dfdc0e	easy	2023-07-22 02:25:30 +08:00
qingxu fu	505b10965f	CodeInterpreter	2023-07-22 01:48:24 +08:00
binary-husky	a393edfaa4	ALLOW CUSTOM API KEY PATTERN	2023-07-21 22:49:07 +08:00
binary-husky	dd7a01cda5	Merge pull request #976 from fenglui/master fix msg.data.split(DELIMITER) exception when msg.data is int	2023-07-21 17:02:29 +08:00
fenglui	00a3b91f95	fix msg.data.split(DELIMITER) exception when msg.data is int	2023-07-21 03:51:33 +08:00
qingxu fu	61ba544282	add latex test samples	2023-07-20 19:49:23 +08:00
qingxu fu	b5b8c123e4	latex plugin stability improvement	2023-07-20 19:39:22 +08:00
qingxu fu	d9ceba959f	expand range after failure	2023-07-20 18:39:02 +08:00
qingxu fu	6b5b040701	remove pdf merge	2023-07-20 18:29:06 +08:00
qingxu fu	4f4c09a5f3	增强Latex修复能力	2023-07-20 18:08:22 +08:00