From f3205994ea670f299b2a4a5a8135f5501f8319ff Mon Sep 17 00:00:00 2001
From: XiaojianTang <277172719@qq.com>
Date: Fri, 26 May 2023 23:22:12 +0800
Subject: [PATCH 01/78] =?UTF-8?q?=E5=A2=9E=E5=8A=A0azure=20openai=20api?=
 =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                        |   7 +
 request_llm/bridge_all.py        |  13 ++
 request_llm/bridge_azure_test.py | 241 +++++++++++++++++++++++++++++++
 3 files changed, 261 insertions(+)
 create mode 100644 request_llm/bridge_azure_test.py

diff --git a/config.py b/config.py
index 14b089e..afaf6a8 100644
--- a/config.py
+++ b/config.py
@@ -1,6 +1,13 @@
 # [step 1]>> 例如： API_KEY = "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" （此key无效）
 API_KEY = "sk-此处填API密钥"    # 可同时填写多个API-KEY，用英文逗号分割，例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2"
 
+#增加关于AZURE的配置信息， 可以在AZURE网页中找到
+AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
+AZURE_API_KEY = "填入azure openai api的密钥"
+AZURE_API_VERSION = "填入api版本"
+AZURE_ENGINE = "填入ENGINE"
+
+
 # [step 2]>> 改为True应用代理，如果直接在海外服务器部署，此处不修改
 USE_PROXY = False
 if USE_PROXY:
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index b6efe21..25c5a8b 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -16,6 +16,9 @@ from toolbox import get_conf, trimmed_format_exc
 from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
 from .bridge_chatgpt import predict as chatgpt_ui
 
+from .bridge_azure_test import predict_no_ui_long_connection as azure_noui
+from .bridge_azure_test import predict as azure_ui
+
 from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
 from .bridge_chatglm import predict as chatglm_ui
 
@@ -93,6 +96,16 @@ model_info = {
         "token_cnt": get_token_num_gpt4,
     },
 
+    # azure openai
+    "azure-gpt35":{
+        "fn_with_ui": azure_ui,
+        "fn_without_ui": azure_noui,
+        "endpoint": get_conf("AZURE_ENDPOINT"),
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+
     # api_2d
     "api2d-gpt-3.5-turbo": {
         "fn_with_ui": chatgpt_ui,
diff --git a/request_llm/bridge_azure_test.py b/request_llm/bridge_azure_test.py
new file mode 100644
index 0000000..edc68f7
--- /dev/null
+++ b/request_llm/bridge_azure_test.py
@@ -0,0 +1,241 @@
+"""
+    该文件中主要包含三个函数
+
+    不具备多线程能力的函数：
+    1. predict: 正常对话时使用，具备完备的交互功能，不可多线程
+
+    具备多线程调用能力的函数
+    2. predict_no_ui：高级实验性功能模块调用，不会实时显示在界面上，参数简单，可以多线程并行，方便实现复杂的功能逻辑
+    3. predict_no_ui_long_connection：在实验过程中发现调用predict_no_ui处理长文档时，和openai的连接容易断掉，这个函数用stream的方式解决这个问题，同样支持多线程
+"""
+
+import logging
+import traceback
+import importlib
+import openai
+import time
+
+
+# 读取config.py文件中关于AZURE OPENAI API的信息
+from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc
+TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \
+    get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY")
+
+
+def get_full_error(chunk, stream_response):
+    """
+        获取完整的从Openai返回的报错
+    """
+    while True:
+        try:
+            chunk += next(stream_response)
+        except:
+            break
+    return chunk
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+    发送至azure openai api，流式获取输出。
+    用于基础的对话功能。
+    inputs 是本次问询的输入
+    top_p, temperature是chatGPT的内部调优参数
+    history 是之前的对话列表（注意无论是inputs还是history，内容太长了都会触发token数量溢出的错误）
+    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
+    additional_fn代表点击的哪个按钮，按钮见functional.py
+    """
+    print(llm_kwargs["llm_model"])    
+
+    if additional_fn is not None:
+        import core_functional
+        importlib.reload(core_functional)    # 热更新prompt
+        core_functional = core_functional.get_core_functions()
+        if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs)  # 获取预处理函数（如果有的话）
+        inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
+
+    raw_input = inputs
+    logging.info(f'[raw_input] {raw_input}')
+    chatbot.append((inputs, ""))
+    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+
+    
+    payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream)    
+        
+    history.append(inputs); history.append("")
+
+    retry = 0
+    while True:
+        try:            
+                
+            openai.api_type = "azure"            
+            openai.api_version = AZURE_API_VERSION
+            openai.api_base = AZURE_ENDPOINT
+            openai.api_key = AZURE_API_KEY
+            response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
+        
+        except:
+            retry += 1
+            chatbot[-1] = ((chatbot[-1][0], "获取response失败，重试中。。。"))
+            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            if retry > MAX_RETRY: raise TimeoutError
+            
+    gpt_replying_buffer = ""    
+    is_head_of_the_stream = True
+    if stream:
+
+        stream_response = response
+
+        while True:
+            try:
+                chunk = next(stream_response)
+                    
+            except StopIteration:                
+                from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
+                chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}")
+                yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面
+                return            
+            
+            if is_head_of_the_stream and (r'"object":"error"' not in chunk):
+                # 数据流的第一帧不携带content
+                is_head_of_the_stream = False; continue
+            
+            if chunk:
+                #print(chunk)
+                try:                     
+                    if "delta" in chunk["choices"][0]:
+                        if chunk["choices"][0]["finish_reason"] == "stop":
+                            logging.info(f'[response] {gpt_replying_buffer}')
+                            break
+                    status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}"    
+                    gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"]                               
+                       
+                    history[-1] = gpt_replying_buffer
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+
+                except Exception as e:
+                    traceback.print_exc()
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+                    chunk = get_full_error(chunk, stream_response)
+                    
+                    error_msg = chunk                    
+                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+                    return
+
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+    """
+    发送至AZURE OPENAI API，等待回复，一次性完成，不显示中间过程。但内部用stream的方法避免中途网线被掐。
+    inputs：
+        是本次问询的输入
+    sys_prompt:
+        系统静默prompt
+    llm_kwargs：
+        chatGPT的内部调优参数
+    history：
+        是之前的对话列表
+    observe_window = None：
+        用于负责跨越线程传递已经输出的部分，大部分时候仅仅为了fancy的视觉效果，留空即可。observe_window[0]：观测窗。observe_window[1]：看门狗
+    """
+    watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
+    payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
+    retry = 0
+    while True:
+
+        try:
+            openai.api_type = "azure"            
+            openai.api_version = AZURE_API_VERSION
+            openai.api_base = AZURE_ENDPOINT
+            openai.api_key = AZURE_API_KEY
+            response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
+        
+        except:  
+            retry += 1
+            traceback.print_exc()
+            if retry > MAX_RETRY: raise TimeoutError
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')     
+        
+
+    stream_response =  response
+    result = ''
+    while True:
+        try: chunk = next(stream_response)
+        except StopIteration: 
+            break
+        except:
+            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
+
+        if len(chunk)==0: continue
+        if not chunk.startswith('data:'): 
+            error_msg = get_full_error(chunk, stream_response)
+            if "reduce the length" in error_msg:
+                raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg)
+            else:
+                raise RuntimeError("AZURE OPENAI API拒绝了请求：" + error_msg)
+        if ('data: [DONE]' in chunk): break 
+        
+        delta = chunk["delta"]
+        if len(delta) == 0: break
+        if "role" in delta: continue
+        if "content" in delta: 
+            result += delta["content"]
+            if not console_slience: print(delta["content"], end='')
+            if observe_window is not None: 
+                # 观测窗，把已经获取的数据显示出去
+                if len(observe_window) >= 1: observe_window[0] += delta["content"]
+                # 看门狗，如果超过期限没有喂狗，则终止
+                if len(observe_window) >= 2:  
+                    if (time.time()-observe_window[1]) > watch_dog_patience:
+                        raise RuntimeError("用户取消了程序。")
+        else: raise RuntimeError("意外Json结构："+delta)
+    if chunk['finish_reason'] == 'length':
+        raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
+    return result
+
+
+def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream):
+    """
+    整合所有信息，选择LLM模型，生成 azure openai api请求，为发送请求做准备
+    """    
+
+    conversation_cnt = len(history) // 2
+
+    messages = [{"role": "system", "content": system_prompt}]
+    if conversation_cnt:
+        for index in range(0, 2*conversation_cnt, 2):
+            what_i_have_asked = {}
+            what_i_have_asked["role"] = "user"
+            what_i_have_asked["content"] = history[index]
+            what_gpt_answer = {}
+            what_gpt_answer["role"] = "assistant"
+            what_gpt_answer["content"] = history[index+1]
+            if what_i_have_asked["content"] != "":
+                if what_gpt_answer["content"] == "": continue                
+                messages.append(what_i_have_asked)
+                messages.append(what_gpt_answer)
+            else:
+                messages[-1]['content'] = what_gpt_answer['content']
+
+    what_i_ask_now = {}
+    what_i_ask_now["role"] = "user"
+    what_i_ask_now["content"] = inputs
+    messages.append(what_i_ask_now)
+
+    payload = {
+        "model": llm_kwargs['llm_model'],
+        "messages": messages, 
+        "temperature": llm_kwargs['temperature'],  # 1.0,
+        "top_p": llm_kwargs['top_p'],  # 1.0,
+        "n": 1,
+        "stream": stream,
+        "presence_penalty": 0,
+        "frequency_penalty": 0,
+        "engine": AZURE_ENGINE
+    }
+    try:
+        print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
+    except:
+        print('输入中可能存在乱码。')
+    return payload
+
+

From c65def90f3bcc152113580488a176e2f232fa140 Mon Sep 17 00:00:00 2001
From: MengDanzz <95761983+MengDanzz@users.noreply.github.com>
Date: Tue, 6 Jun 2023 14:36:30 +0800
Subject: [PATCH 02/78] =?UTF-8?q?=E5=B0=86Dockerfile=20COPY=E5=88=86?=
 =?UTF-8?q?=E6=88=90=E4=B8=A4=E6=AE=B5=EF=BC=8C=E7=BC=93=E5=AD=98=E4=BE=9D?=
 =?UTF-8?q?=E8=B5=96=E5=BA=93=EF=BC=8C=E9=87=8D=E6=96=B0=E6=9E=84=E5=BB=BA?=
 =?UTF-8?q?=E4=B8=8D=E9=9C=80=E8=A6=81=E9=87=8D=E6=96=B0=E5=AE=89=E8=A3=85?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 19d988f..aa4eee8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -10,12 +10,14 @@ RUN echo '[global]' > /etc/pip.conf && \
 
 WORKDIR /gpt
 
-# 装载项目文件
-COPY . .
+
+
 
 # 安装依赖
+COPY requirements.txt ./
 RUN pip3 install -r requirements.txt
-
+# 装载项目文件
+COPY . .
 
 # 可选步骤，用于预热模块
 RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'

From 40da1b0afefd42bb27255e6980ac9b3fd43d7654 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Tue, 6 Jun 2023 18:44:00 +0800
Subject: [PATCH 03/78] =?UTF-8?q?=E5=B0=86Latex=E5=88=86=E8=A7=A3=E7=A8=8B?=
 =?UTF-8?q?=E5=BA=8F=E6=94=BE=E5=88=B0=E5=AD=90=E8=BF=9B=E7=A8=8B=E6=89=A7?=
 =?UTF-8?q?=E8=A1=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/Latex输出PDF结果.py     | 15 ++++-
 crazy_functions/crazy_functions_test.py |  8 +--
 crazy_functions/latex_utils.py          | 84 +++++++++++++++++--------
 docs/Dockerfile+NoLocal+Latex           | 21 ++-----
 4 files changed, 80 insertions(+), 48 deletions(-)

diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index ecba82b..855cc1c 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -82,7 +82,14 @@ def arxiv_download(chatbot, history, txt):
             promote_file_to_downloadzone(target_file)
             return target_file
         return False
-    
+    def is_float(s):
+        try:
+            float(s)
+            return True
+        except ValueError:
+            return False
+    if ('.' in txt) and ('/' not in txt) and is_float(txt):
+        txt = 'https://arxiv.org/abs/' + txt
     if not txt.startswith('https://arxiv.org'): 
         return txt, None
     
@@ -198,7 +205,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
     # <-------------- information about this plugin ------------->
     chatbot.append([
         "函数插件功能？",
-        "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。仅在Windows系统进行了测试，其他操作系统表现未知。"])
+        "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。"])
     yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
 
@@ -221,6 +228,8 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
         report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
+    
+
     if os.path.exists(txt):
         project_folder = txt
     else:
@@ -228,6 +237,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
         report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
+    
     file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
     if len(file_manifest) == 0:
         report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
@@ -261,5 +271,6 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
         chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
 
+
     # <-------------- we are done ------------->
     return success
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index d19d653..e743878 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -182,13 +182,13 @@ def test_Langchain知识库读取():
 def test_Latex():
     from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF
 
-    txt = r"https://arxiv.org/abs/1706.03762"
+    # txt = r"https://arxiv.org/abs/1706.03762"
     # txt = r"https://arxiv.org/abs/1902.03185"
     # txt = r"https://arxiv.org/abs/2305.18290"
     # txt = r"https://arxiv.org/abs/2305.17608"
-    # txt = r"https://arxiv.org/abs/2211.16068"   #  ACE
-    # txt = r"C:\Users\fuqingxu\arxiv_cache\2211.16068\workfolder"   #  ACE
-    
+    # txt = r"https://arxiv.org/abs/2211.16068"                     #  ACE
+    # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
+    txt = r"https://arxiv.org/abs/2002.09253"
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
         cli_printer.print(cb)   #  print(cb)
 
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index b490b5c..15dfebc 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -61,8 +61,8 @@ class LinkedListNode():
         self.string = string
         self.preserve = preserve
         self.next = None
-        self.begin_line = 0
-        self.begin_char = 0
+        # self.begin_line = 0
+        # self.begin_char = 0
 
 def convert_to_linklist(text, mask):
     root = LinkedListNode("", preserve=True)
@@ -97,11 +97,22 @@ def 寻找Latex主文件(file_manifest, mode):
         else:
             continue
     raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
-
+def rm_comments(main_file):
+    new_file_remove_comment_lines = []
+    for l in main_file.splitlines():
+        # 删除整行的空注释
+        if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")):
+            pass
+        else:
+            new_file_remove_comment_lines.append(l)
+    main_file = '\n'.join(new_file_remove_comment_lines)
+    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
+    return main_file
 def merge_tex_files_(project_foler, main_file, mode):
     """
     Merge Tex project recrusively
     """
+    main_file = rm_comments(main_file)
     for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
         f = s.group(1)
         fp = os.path.join(project_foler, f)
@@ -124,6 +135,8 @@ def merge_tex_files(project_foler, main_file, mode):
     P.S. 顺便把Latex的注释去除
     """
     main_file = merge_tex_files_(project_foler, main_file, mode)
+    main_file = rm_comments(main_file)
+
     if mode == 'translate_zh':
         pattern = re.compile(r'\\documentclass.*\n')
         match = pattern.search(main_file)
@@ -136,16 +149,6 @@ def merge_tex_files(project_foler, main_file, mode):
         if platform.system() != 'Windows':
             main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file)
             main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file)
-
-    new_file_remove_comment_lines = []
-    for l in main_file.splitlines():
-        # 删除整行的空注释
-        if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")):
-            pass
-        else:
-            new_file_remove_comment_lines.append(l)
-    main_file = '\n'.join(new_file_remove_comment_lines)
-    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
     return main_file
 
 
@@ -197,10 +200,7 @@ class LatexPaperSplit():
     be proccessed by GPT.
     """
     def __init__(self) -> None:
-        """
-        root是链表的根节点
-        """
-        self.root = None
+        self.nodes = None
         self.msg = "{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
             "版权归原文作者所有。翻译内容可靠性无任何保障，请仔细鉴别并以原文为准。" + \
             "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
@@ -212,16 +212,13 @@ class LatexPaperSplit():
         Merge the result after the GPT process completed
         """
         result_string = ""
-        node = self.root
         p = 0
-        while True:
+        for node in self.nodes:
             if node.preserve:
                 result_string += node.string
             else:
                 result_string += fix_content(arr[p], node.string)
                 p += 1
-            node = node.next
-            if node is None: break
         if mode == 'translate_zh':
             pattern = re.compile(r'\\begin\{abstract\}.*\n')
             match = pattern.search(result_string)
@@ -229,7 +226,27 @@ class LatexPaperSplit():
             result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
         return result_string
 
-    def split(self, txt, project_folder):
+    def split(self, txt, project_folder): 
+        """
+        break down latex file to a linked list,
+        each node use a preserve flag to indicate whether it should
+        be proccessed by GPT.
+        P.S. use multiprocessing to avoid timeout error
+        """
+        import multiprocessing
+        manager = multiprocessing.Manager()
+        return_dict = manager.dict()
+        p = multiprocessing.Process(
+            target=lambda lps, txt, project_folder, return_dict: 
+            lps.split_subprocess(txt, project_folder, return_dict), 
+            args=(self, txt, project_folder, return_dict))
+        p.start()
+        p.join()
+        self.nodes = return_dict['nodes']
+        self.sp = return_dict['segment_parts_for_gpt']
+        return self.sp
+
+    def split_subprocess(self, txt, project_folder, return_dict):
         """
         break down latex file to a linked list,
         each node use a preserve flag to indicate whether it should
@@ -318,12 +335,20 @@ class LatexPaperSplit():
             node = node.next
             if node is None: break
 
+        # 屏蔽空行和太短的句子
         node = root
         while True:
             if len(node.string.strip('\n').strip(''))==0: node.preserve = True
             if len(node.string.strip('\n').strip(''))<42: node.preserve = True
             node = node.next
             if node is None: break
+        node = root
+        while True:
+            if node.next and node.preserve and node.next.preserve:
+                node.string += node.next.string
+                node.next = node.next.next
+            node = node.next
+            if node is None: break
 
         # 将前后断行符脱离
         node = root
@@ -345,8 +370,10 @@ class LatexPaperSplit():
 
         with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
             segment_parts_for_gpt = []
+            nodes = []
             node = root
             while True:
+                nodes.append(node)
                 show_html = node.string.replace('\n','<br/>')
                 if not node.preserve:
                     segment_parts_for_gpt.append(node.string)
@@ -355,9 +382,11 @@ class LatexPaperSplit():
                     f.write(f'<p style="color:red;">{show_html}</p>')
                 node = node.next
                 if node is None: break
-        self.root = root
-        self.sp = segment_parts_for_gpt
-        return self.sp
+
+        for n in nodes: n.next = None   # break
+        return_dict['nodes'] = nodes
+        return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
+        return return_dict
 
 class LatexPaperFileGroup():
     """
@@ -439,7 +468,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
 
     #  <-------- 精细切分latex文件 ----------> 
     lps = LatexPaperSplit()
-    res = lps.split(merged_content, project_folder)
+    res = lps.split(merged_content, project_folder) # 消耗时间的函数
 
     #  <-------- 拆分过长的latex片段 ----------> 
     pfg = LatexPaperFileGroup()
@@ -515,7 +544,8 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
             f.writelines(file_lines)
         return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
     except:
-        return False, 0, [0]
+        print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
+        return False, -1, [-1]
     
 
 def compile_latex_with_timeout(command, timeout=60):
diff --git a/docs/Dockerfile+NoLocal+Latex b/docs/Dockerfile+NoLocal+Latex
index 428dbc0..0f9ac8a 100644
--- a/docs/Dockerfile+NoLocal+Latex
+++ b/docs/Dockerfile+NoLocal+Latex
@@ -8,26 +8,17 @@ FROM fuqingxu/python311_texlive_ctex:latest
 # 指定路径
 WORKDIR /gpt
 
+ARG useProxyNetwork=''
+
+RUN $useProxyNetwork pip3 install gradio openai numpy arxiv rich -i https://pypi.douban.com/simple/
+RUN $useProxyNetwork pip3 install colorama Markdown pygments pymupdf -i https://pypi.douban.com/simple/
+
 # 装载项目文件
 COPY . .
 
-ARG useProxyNetwork=''
-
-
-# # # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除
-# RUN apt-get update
-# RUN apt-get install -y curl proxychains
-# RUN $useProxyNetwork curl cip.cc
-# RUN sed -i '$ d' /etc/proxychains.conf
-# RUN sed -i '$ d' /etc/proxychains.conf
-# RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf
-# ARG useProxyNetwork=proxychains
-# # # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除
-
-
 
 # 安装依赖
-RUN $useProxyNetwork pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple
+RUN $useProxyNetwork pip3 install -r requirements.txt -i https://pypi.douban.com/simple/
 
 # 可选步骤，用于预热模块
 RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'

From 8ef734410160f2b8090a2ec10b15069ee60da9b7 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Tue, 6 Jun 2023 18:57:52 +0800
Subject: [PATCH 04/78] fix subprocess bug in Windows

---
 crazy_functions/latex_utils.py | 288 +++++++++++++++++----------------
 1 file changed, 145 insertions(+), 143 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 15dfebc..d3d7b9c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -192,6 +192,149 @@ def fix_content(final_tex, node_string):
             final_tex = node_string # 出问题了，还原原文
     return final_tex
 
+def split_subprocess(txt, project_folder, return_dict):
+    """
+    break down latex file to a linked list,
+    each node use a preserve flag to indicate whether it should
+    be proccessed by GPT.
+    """
+    text = txt
+    mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
+
+    # 吸收title与作者以上的部分
+    text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL)
+    # 删除iffalse注释
+    text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
+    # 吸收在25行以内的begin-end组合
+    text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
+    # 吸收匿名公式
+    text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
+    # 吸收其他杂项
+    text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\item ")
+    text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
+    # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
+    root = convert_to_linklist(text, mask)
+
+    # 修复括号
+    node = root
+    while True:
+        string = node.string
+        if node.preserve: 
+            node = node.next
+            if node is None: break
+            continue
+        def break_check(string):
+            str_stack = [""] # (lv, index)
+            for i, c in enumerate(string):
+                if c == '{':
+                    str_stack.append('{')
+                elif c == '}':
+                    if len(str_stack) == 1:
+                        print('stack fix')
+                        return i
+                    str_stack.pop(-1)
+                else:
+                    str_stack[-1] += c
+            return -1
+        bp = break_check(string)
+
+        if bp == -1:
+            pass
+        elif bp == 0:
+            node.string = string[:1]
+            q = LinkedListNode(string[1:], False)
+            q.next = node.next
+            node.next = q
+        else:
+            node.string = string[:bp]
+            q = LinkedListNode(string[bp:], False)
+            q.next = node.next
+            node.next = q
+
+        node = node.next
+        if node is None: break
+
+    # 屏蔽空行和太短的句子
+    node = root
+    while True:
+        if len(node.string.strip('\n').strip(''))==0: node.preserve = True
+        if len(node.string.strip('\n').strip(''))<42: node.preserve = True
+        node = node.next
+        if node is None: break
+    node = root
+    while True:
+        if node.next and node.preserve and node.next.preserve:
+            node.string += node.next.string
+            node.next = node.next.next
+        node = node.next
+        if node is None: break
+
+    # 将前后断行符脱离
+    node = root
+    prev_node = None
+    while True:
+        if not node.preserve:
+            lstriped_ = node.string.lstrip().lstrip('\n')
+            if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
+                prev_node.string += node.string[:-len(lstriped_)]
+                node.string = lstriped_
+            rstriped_ = node.string.rstrip().rstrip('\n')
+            if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
+                node.next.string = node.string[len(rstriped_):] + node.next.string
+                node.string = rstriped_
+        # =====
+        prev_node = node
+        node = node.next
+        if node is None: break
+
+    with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
+        segment_parts_for_gpt = []
+        nodes = []
+        node = root
+        while True:
+            nodes.append(node)
+            show_html = node.string.replace('\n','<br/>')
+            if not node.preserve:
+                segment_parts_for_gpt.append(node.string)
+                f.write(f'<p style="color:black;">#{show_html}#</p>')
+            else:
+                f.write(f'<p style="color:red;">{show_html}</p>')
+            node = node.next
+            if node is None: break
+
+    for n in nodes: n.next = None   # break
+    return_dict['nodes'] = nodes
+    return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
+    return return_dict
+
+
 
 class LatexPaperSplit():
     """
@@ -237,156 +380,15 @@ class LatexPaperSplit():
         manager = multiprocessing.Manager()
         return_dict = manager.dict()
         p = multiprocessing.Process(
-            target=lambda lps, txt, project_folder, return_dict: 
-            lps.split_subprocess(txt, project_folder, return_dict), 
-            args=(self, txt, project_folder, return_dict))
+            target=split_subprocess, 
+            args=(txt, project_folder, return_dict))
         p.start()
         p.join()
         self.nodes = return_dict['nodes']
         self.sp = return_dict['segment_parts_for_gpt']
         return self.sp
 
-    def split_subprocess(self, txt, project_folder, return_dict):
-        """
-        break down latex file to a linked list,
-        each node use a preserve flag to indicate whether it should
-        be proccessed by GPT.
-        """
-        text = txt
-        mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
 
-        # 吸收title与作者以上的部分
-        text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL)
-        # 删除iffalse注释
-        text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
-        # 吸收在25行以内的begin-end组合
-        text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
-        # 吸收匿名公式
-        text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
-        # 吸收其他杂项
-        text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
-        text, mask = split_worker(text, mask, r"\\item ")
-        text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
-        text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
-        # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
-        root = convert_to_linklist(text, mask)
-
-        # 修复括号
-        node = root
-        while True:
-            string = node.string
-            if node.preserve: 
-                node = node.next
-                if node is None: break
-                continue
-            def break_check(string):
-                str_stack = [""] # (lv, index)
-                for i, c in enumerate(string):
-                    if c == '{':
-                        str_stack.append('{')
-                    elif c == '}':
-                        if len(str_stack) == 1:
-                            print('stack fix')
-                            return i
-                        str_stack.pop(-1)
-                    else:
-                        str_stack[-1] += c
-                return -1
-            bp = break_check(string)
-
-            if bp == -1:
-                pass
-            elif bp == 0:
-                node.string = string[:1]
-                q = LinkedListNode(string[1:], False)
-                q.next = node.next
-                node.next = q
-            else:
-                node.string = string[:bp]
-                q = LinkedListNode(string[bp:], False)
-                q.next = node.next
-                node.next = q
-
-            node = node.next
-            if node is None: break
-
-        # 屏蔽空行和太短的句子
-        node = root
-        while True:
-            if len(node.string.strip('\n').strip(''))==0: node.preserve = True
-            if len(node.string.strip('\n').strip(''))<42: node.preserve = True
-            node = node.next
-            if node is None: break
-        node = root
-        while True:
-            if node.next and node.preserve and node.next.preserve:
-                node.string += node.next.string
-                node.next = node.next.next
-            node = node.next
-            if node is None: break
-
-        # 将前后断行符脱离
-        node = root
-        prev_node = None
-        while True:
-            if not node.preserve:
-                lstriped_ = node.string.lstrip().lstrip('\n')
-                if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
-                    prev_node.string += node.string[:-len(lstriped_)]
-                    node.string = lstriped_
-                rstriped_ = node.string.rstrip().rstrip('\n')
-                if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
-                    node.next.string = node.string[len(rstriped_):] + node.next.string
-                    node.string = rstriped_
-            # =====
-            prev_node = node
-            node = node.next
-            if node is None: break
-
-        with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
-            segment_parts_for_gpt = []
-            nodes = []
-            node = root
-            while True:
-                nodes.append(node)
-                show_html = node.string.replace('\n','<br/>')
-                if not node.preserve:
-                    segment_parts_for_gpt.append(node.string)
-                    f.write(f'<p style="color:black;">#{show_html}#</p>')
-                else:
-                    f.write(f'<p style="color:red;">{show_html}</p>')
-                node = node.next
-                if node is None: break
-
-        for n in nodes: n.next = None   # break
-        return_dict['nodes'] = nodes
-        return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
-        return return_dict
 
 class LatexPaperFileGroup():
     """

From 9aafb2ee479f067ac4b6a955a7e43a1d0c553f50 Mon Sep 17 00:00:00 2001
From: MengDanzz <95761983+MengDanzz@users.noreply.github.com>
Date: Wed, 7 Jun 2023 09:18:57 +0800
Subject: [PATCH 05/78] =?UTF-8?q?=E9=9D=9Epypi=E5=8C=85=E5=8A=A0=E5=85=A5C?=
 =?UTF-8?q?OPY?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index aa4eee8..77f4188 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,6 +15,7 @@ WORKDIR /gpt
 
 # 安装依赖
 COPY requirements.txt ./
+COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl
 RUN pip3 install -r requirements.txt
 # 装载项目文件
 COPY . .

From dae65fd2c293cb4c4c8370ce962d5038a24378ce Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Wed, 7 Jun 2023 10:43:45 +0800
Subject: [PATCH 06/78] =?UTF-8?q?=E5=9C=A8copy=20..=E5=90=8E=E5=9C=A8?=
 =?UTF-8?q?=E8=BF=90=E8=A1=8C=E4=B8=80=E6=AC=A1pip=20install=E6=A3=80?=
 =?UTF-8?q?=E6=9F=A5=E4=BE=9D=E8=B5=96=E5=8F=98=E5=8C=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Dockerfile b/Dockerfile
index 77f4188..97ad13d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -19,6 +19,7 @@ COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl
 RUN pip3 install -r requirements.txt
 # 装载项目文件
 COPY . .
+RUN pip3 install -r requirements.txt
 
 # 可选步骤，用于预热模块
 RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'

From 149db621ec812fd2341fe3060b80ee210a81e528 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Wed, 7 Jun 2023 11:09:12 +0800
Subject: [PATCH 07/78] langchain check depends

---
 crazy_functions/Langchain知识库.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py
index 36999d5..5b09d3b 100644
--- a/crazy_functions/Langchain知识库.py
+++ b/crazy_functions/Langchain知识库.py
@@ -75,9 +75,18 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
 
 @CatchException
 def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
+    # resolve deps
+    try:
+        from zh_langchain import construct_vector_store
+        from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+        from .crazy_utils import knowledge_archive_interface
+    except Exception as e:
+        chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装，请查看终端的输出或耐心等待..."])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        from .crazy_utils import try_install_deps
+        try_install_deps(['zh_langchain==0.2.0'])
 
     # < -------------------  --------------- >
-    from .crazy_utils import knowledge_archive_interface
     kai = knowledge_archive_interface()
 
     if 'langchain_plugin_embedding' in chatbot._cookies:

From 77cc141a8227ee78a936b57de970cd74b89495e3 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:14:02 +0800
Subject: [PATCH 08/78] Update README.md

---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 02f047d..c671477 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
 [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL，让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)，再回答问题，让信息永不过时
+Arxiv论文精密翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/)，迄今为止最好的论文翻译工具
 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png)，支持公式、代码高亮
 多线程函数插件支持 | 支持多线调用chatgpt，一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
@@ -285,11 +286,18 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" width="500" >
 </div>
+<div align="center">
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" width="200">
+</div>
 
+10. Latex/Arxiv论文翻译功能
+<div align="center">
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" width="500" >
+</div>
 
 ## 版本:
 - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件（高优先级）
-- version 3.4(Todo): 完善chatglm本地大模型的多线支持
+- version 3.4: +arxiv论文翻译、latex论文批改功能
 - version 3.3: +互联网信息综合功能
 - version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的LLM组合)
 - version 3.1: 支持同时问询多个gpt模型！支持api2d，支持多个apikey负载均衡

From e2de1d76ea9c2747b6ed0c5a90abc8863893bf20 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:18:31 +0800
Subject: [PATCH 09/78] Update README.md

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c671477..d72a29f 100644
--- a/README.md
+++ b/README.md
@@ -292,7 +292,8 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 10. Latex/Arxiv论文翻译功能
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" width="500" >
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="200" > ===>
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="200" >
 </div>
 
 ## 版本:

From 10b3001dba7cde9ac6b8934eac287380f5ba16cf Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:19:11 +0800
Subject: [PATCH 10/78] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d72a29f..8ada026 100644
--- a/README.md
+++ b/README.md
@@ -292,8 +292,8 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 10. Latex/Arxiv论文翻译功能
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="200" > ===>
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="200" >
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="300" > ===>
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="300" >
 </div>
 
 ## 版本:

From ce6f11d2003864edc0fb22051403791c0cbba5b3 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:20:49 +0800
Subject: [PATCH 11/78] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 8ada026..40125ac 100644
--- a/README.md
+++ b/README.md
@@ -292,8 +292,8 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 10. Latex/Arxiv论文翻译功能
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="300" > ===>
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="300" >
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" > ===>
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
 </div>
 
 ## 版本:

From a0ea5d0e9e7abd951e92162f457722e816848b62 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:22:03 +0800
Subject: [PATCH 12/78] Update README.md

---
 README.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 40125ac..289cf06 100644
--- a/README.md
+++ b/README.md
@@ -284,15 +284,13 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 10. Latex全文校对纠错
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" width="500" >
-</div>
-<div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" width="200">
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="250" > ===>
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="250">
 </div>
 
 10. Latex/Arxiv论文翻译功能
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" > ===>
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" >
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
 </div>
 

From f9226d92be881faa2bea7e42f75c467b6ea2f7dd Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:24:14 +0800
Subject: [PATCH 13/78] Update version

---
 version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version b/version
index ad75b2c..669c708 100644
--- a/version
+++ b/version
@@ -1,5 +1,5 @@
 {
-  "version": 3.37,
+  "version": 3.4,
   "show_feature": true,
-  "new_feature": "修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持 <-> 提供复旦MOSS模型适配（启用需额外依赖） <-> 提供docker-compose方案兼容LLAMA盘古RWKV等模型的后端 <-> 新增Live2D装饰 <-> 完善对话历史的保存/载入/删除 <-> 保存对话功能"
+  "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
 }

From ff5403eac6e615c74a991dfecd93f9a5a12036d4 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:42:24 +0800
Subject: [PATCH 14/78] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 289cf06..d121116 100644
--- a/README.md
+++ b/README.md
@@ -284,8 +284,8 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 10. Latex全文校对纠错
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="250" > ===>
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="250">
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
 </div>
 
 10. Latex/Arxiv论文翻译功能

From f30c9c6d3bf34f63b82e59c031220124c0e1c35d Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:43:13 +0800
Subject: [PATCH 15/78] Update README.md

---
 README.md | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index d121116..d4526c9 100644
--- a/README.md
+++ b/README.md
@@ -233,27 +233,31 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 <img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" >
 </div>
 
+2. Latex/Arxiv论文翻译功能
+<div align="center">
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" >
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
+</div>
 
-
-2. 生成报告。大部分插件都会在执行结束后，生成工作报告
+3. 生成报告。大部分插件都会在执行结束后，生成工作报告
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/227503770-fe29ce2c-53fd-47b0-b0ff-93805f0c2ff4.png" height="300" >
 <img src="https://user-images.githubusercontent.com/96192199/227504617-7a497bb3-0a2a-4b50-9a8a-95ae60ea7afd.png" height="300" >
 <img src="https://user-images.githubusercontent.com/96192199/227504005-efeaefe0-b687-49d0-bf95-2d7b7e66c348.png" height="300" >
 </div>
 
-3. 模块化功能设计，简单的接口却能支持强大的功能
+4. 模块化功能设计，简单的接口却能支持强大的功能
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/229288270-093643c1-0018-487a-81e6-1d7809b6e90f.png" height="400" >
 <img src="https://user-images.githubusercontent.com/96192199/227504931-19955f78-45cd-4d1c-adac-e71e50957915.png" height="400" >
 </div>
 
-4. 这是一个能够“自我译解”的开源项目
+5. 这是一个能够“自我译解”的开源项目
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/226936850-c77d7183-0749-4c1c-9875-fd4891842d0c.png" width="500" >
 </div>
 
-5. 译解其他开源项目，不在话下
+6. 译解其他开源项目，不在话下
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="500" >
 </div>
@@ -262,37 +266,33 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 <img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" width="500" >
 </div>
 
-6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能（默认关闭，需要修改`config.py`）
+7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能（默认关闭，需要修改`config.py`）
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
 </div>
 
-7. 新增MOSS大语言模型支持
+8. 新增MOSS大语言模型支持
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/236639178-92836f37-13af-4fdd-984d-b4450fe30336.png" width="500" >
 </div>
 
-8. OpenAI图像生成
+9. OpenAI图像生成
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
 </div>
 
-9. OpenAI音频解析与总结
+10. OpenAI音频解析与总结
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/709ccf95-3aee-498a-934a-e1c22d3d5d5b" width="500" >
 </div>
 
-10. Latex全文校对纠错
+11. Latex全文校对纠错
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
 </div>
 
-10. Latex/Arxiv论文翻译功能
-<div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" >
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
-</div>
+
 
 ## 版本:
 - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件（高优先级）

From b52695845e181399fb0b5607b26125a8070dd1e6 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:44:05 +0800
Subject: [PATCH 16/78] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d4526c9..2045942 100644
--- a/README.md
+++ b/README.md
@@ -235,7 +235,7 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 2. Latex/Arxiv论文翻译功能
 <div align="center">
-<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" >
+<img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" > ===>
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
 </div>
 

From 110510997f9018d206c05384ccb7526ae9b96db2 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Thu, 8 Jun 2023 12:48:52 +0800
Subject: [PATCH 17/78] Update README.md

---
 README.md | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 2045942..d4d6858 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
 [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL，让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)，再回答问题，让信息永不过时
-Arxiv论文精密翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/)，迄今为止最好的论文翻译工具
+⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/)，迄今为止最好的论文翻译工具⭐
 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png)，支持公式、代码高亮
 多线程函数插件支持 | 支持多线调用chatgpt，一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
@@ -233,7 +233,7 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 <img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" >
 </div>
 
-2. Latex/Arxiv论文翻译功能
+2. ⭐Latex/Arxiv论文翻译功能⭐
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/002a1a75-ace0-4e6a-94e2-ec1406a746f1" height="250" > ===>
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/9fdcc391-f823-464f-9322-f8719677043b" height="250" >
@@ -241,9 +241,8 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 3. 生成报告。大部分插件都会在执行结束后，生成工作报告
 <div align="center">
-<img src="https://user-images.githubusercontent.com/96192199/227503770-fe29ce2c-53fd-47b0-b0ff-93805f0c2ff4.png" height="300" >
-<img src="https://user-images.githubusercontent.com/96192199/227504617-7a497bb3-0a2a-4b50-9a8a-95ae60ea7afd.png" height="300" >
-<img src="https://user-images.githubusercontent.com/96192199/227504005-efeaefe0-b687-49d0-bf95-2d7b7e66c348.png" height="300" >
+<img src="https://user-images.githubusercontent.com/96192199/227503770-fe29ce2c-53fd-47b0-b0ff-93805f0c2ff4.png" height="250" >
+<img src="https://user-images.githubusercontent.com/96192199/227504617-7a497bb3-0a2a-4b50-9a8a-95ae60ea7afd.png" height="250" >
 </div>
 
 4. 模块化功能设计，简单的接口却能支持强大的功能
@@ -259,11 +258,8 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 
 6. 译解其他开源项目，不在话下
 <div align="center">
-<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" width="500" >
-</div>
-
-<div align="center">
-<img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" width="500" >
+<img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" height="250" >
+<img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" height="250" >
 </div>
 
 7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能（默认关闭，需要修改`config.py`）

From e48d92e82e9634b5194947567bf7512a346d3343 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 8 Jun 2023 18:34:06 +0800
Subject: [PATCH 18/78] update translation

---
 docs/translate_english.json | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/translate_english.json b/docs/translate_english.json
index d9968c6..57e008b 100644
--- a/docs/translate_english.json
+++ b/docs/translate_english.json
@@ -58,6 +58,8 @@
     "连接网络回答问题": "ConnectToNetworkToAnswerQuestions",
     "联网的ChatGPT": "ChatGPTConnectedToNetwork",
     "解析任意code项目": "ParseAnyCodeProject",
+    "读取知识库作答": "ReadKnowledgeArchiveAnswerQuestions",
+    "知识库问答": "UpdateKnowledgeArchive",
     "同时问询_指定模型": "InquireSimultaneously_SpecifiedModel",
     "图片生成": "ImageGeneration",
     "test_解析ipynb文件": "Test_ParseIpynbFile",

From ef1bfdd60f6b7c23bb23406cd8e0603f51f81165 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Thu, 8 Jun 2023 21:29:10 +0800
Subject: [PATCH 19/78] update pip install notice

---
 crazy_functions/数学动画生成manim.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crazy_functions/数学动画生成manim.py b/crazy_functions/数学动画生成manim.py
index 5851b9c..26e61b1 100644
--- a/crazy_functions/数学动画生成manim.py
+++ b/crazy_functions/数学动画生成manim.py
@@ -8,7 +8,7 @@ def inspect_dependency(chatbot, history):
         import manim
         return True
     except:
-        chatbot.append(["导入依赖失败", "使用该模块需要额外依赖，安装方法:```pip install manimgl```"])
+        chatbot.append(["导入依赖失败", "使用该模块需要额外依赖，安装方法:```pip install manim manimgl```"])
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return False
 

From 3c00e7a143f4c619166d4821d9804ef8aa0c5848 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Sat, 10 Jun 2023 21:45:38 +0800
Subject: [PATCH 20/78] file link in chatbot

---
 toolbox.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/toolbox.py b/toolbox.py
index 18915d0..4b0e1dd 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -483,7 +483,9 @@ def on_report_generated(files, chatbot):
     if len(report_files) == 0:
         return None, chatbot
     # files.extend(report_files)
-    chatbot.append(['报告如何远程获取？', '报告已经添加到右侧“文件上传区”（可能处于折叠状态），请查收。'])
+    file_links = ''
+    for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
+    chatbot.append(['报告如何远程获取？', f'报告已经添加到右侧“文件上传区”（可能处于折叠状态），请查收。{file_links}'])
     return report_files, chatbot
 
 def is_openai_api_key(key):

From ce0d8b9dab677dfb8e46429a58eeec2ef965b0ab Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 11 Jun 2023 01:36:23 +0800
Subject: [PATCH 21/78] =?UTF-8?q?=E8=99=9A=E7=A9=BA=E7=BB=88=E7=AB=AF?=
 =?UTF-8?q?=E6=8F=92=E4=BB=B6=E9=9B=8F=E5=BD=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functional.py         |  16 ++++-
 crazy_functions/虚空终端.py | 131 ++++++++++++++++++++++++++++++++++++
 toolbox.py                  |  16 +++--
 3 files changed, 158 insertions(+), 5 deletions(-)
 create mode 100644 crazy_functions/虚空终端.py

diff --git a/crazy_functional.py b/crazy_functional.py
index d8ca9ae..2f0fbaa 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -368,5 +368,19 @@ def get_crazy_functions():
         })
     except:
         print('Load function plugin failed')
-    ###################### 第n组插件 ###########################
+
+    try:
+        from crazy_functions.虚空终端 import 终端
+        function_plugins.update({
+            "超级终端": {
+                "Color": "stop",
+                "AsButton": False,
+                # "AdvancedArgs": True,
+                # "ArgsReminder": "",
+                "Function": HotReload(终端)
+            }
+        })
+    except:
+        print('Load function plugin failed')
+
     return function_plugins
diff --git a/crazy_functions/虚空终端.py b/crazy_functions/虚空终端.py
new file mode 100644
index 0000000..fe71a46
--- /dev/null
+++ b/crazy_functions/虚空终端.py
@@ -0,0 +1,131 @@
+from toolbox import CatchException, update_ui, gen_time_str
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+from .crazy_utils import input_clipping
+
+
+prompt = """
+I have to achieve some functionalities by calling one of the functions below.
+Your job is to find the correct funtion to use to satisfy my requirement,
+and then write python code to call this function with correct parameters.
+
+These are functions you are allowed to choose from:
+1. 
+    功能描述: 总结音视频内容
+    调用函数: ConcludeAudioContent(txt, llm_kwargs)
+    参数说明: 
+            txt: 音频文件的路径
+            llm_kwargs: 模型参数, 永远给定None
+2. 
+    功能描述: 将每次对话记录写入Markdown格式的文件中
+    调用函数: WriteMarkdown()
+3.
+    功能描述: 将指定目录下的PDF文件从英文翻译成中文
+    调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs)
+    参数说明: 
+            txt: PDF文件所在的路径
+            llm_kwargs: 模型参数, 永远给定None
+4.
+    功能描述: 根据文本使用GPT模型生成相应的图像
+    调用函数: ImageGeneration(txt, llm_kwargs)
+    参数说明: 
+            txt: 图像生成所用到的提示文本
+            llm_kwargs: 模型参数, 永远给定None
+5.
+    功能描述: 对输入的word文档进行摘要生成 
+    调用函数: SummarizingWordDocuments(input_path, output_path)
+    参数说明: 
+            input_path: 待处理的word文档路径
+            output_path: 摘要生成后的文档路径
+
+
+You should always anwser with following format:
+----------------
+Code:
+```
+class AutoAcademic(object):
+    def __init__(self):
+        self.selected_function = "FILL_CORRECT_FUNCTION_HERE"      # e.g., "GenerateImage"
+        self.txt = "FILL_MAIN_PARAMETER_HERE"      # e.g., "荷叶上的蜻蜓"
+        self.llm_kwargs = None
+```
+Explanation:
+只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。
+----------------
+
+Now, this is my requirement: 
+
+"""
+def get_fn_lib():
+    return {
+        "BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程",  "批量翻译PDF文档"),
+        "SummarizingWordDocuments": ("crazy_functions.总结word文档",  "总结word文档"),
+        "ImageGeneration": ("crazy_functions.图片生成",  "图片生成"),
+        "TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译",  "Markdown中译英"),
+        "SummaryAudioVideo": ("crazy_functions.总结音视频",  "总结音视频"),
+    }
+
+def inspect_dependency(chatbot, history):
+    return True
+
+def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    import subprocess, sys, os, shutil, importlib
+
+    with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f:
+        f.write(code)
+
+    try:
+        AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic')
+        # importlib.reload(AutoAcademic)
+        auto_dict = AutoAcademic()
+        selected_function = auto_dict.selected_function
+        txt = auto_dict.txt
+        fp, fn = get_fn_lib()[selected_function]
+        fn_plugin = getattr(importlib.import_module(fp, fn), fn)
+        yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
+    except:
+        from toolbox import trimmed_format_exc
+        chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+def get_code_block(reply):
+    import re
+    pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
+    matches = re.findall(pattern, reply) # find all code blocks in text
+    if len(matches) != 1: 
+        raise RuntimeError("GPT is not generating proper code.")
+    return matches[0].strip('python') #  code block
+
+@CatchException
+def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
+    plugin_kwargs   插件模型的参数, 暂时没有用武之地
+    chatbot         聊天显示框的句柄, 用于显示给用户
+    history         聊天历史, 前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    # 清空历史, 以免输入溢出
+    history = []    
+
+    # 基本信息：功能、贡献者
+    chatbot.append(["函数插件功能？", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    # # 尝试导入依赖, 如果缺少依赖, 则给出安装建议
+    # dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
+    # if not dep_ok: return
+    
+    # 输入
+    i_say = prompt + txt
+    # 开始
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=txt, 
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], 
+        sys_prompt=""
+    )
+
+    # 将代码转为动画
+    code = get_code_block(gpt_say)
+    yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
diff --git a/toolbox.py b/toolbox.py
index 4b0e1dd..4ab1116 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -221,16 +221,21 @@ def text_divide_paragraph(text):
     """
     将文本按照段落分隔符分割开，生成带有段落标签的HTML代码。
     """
+    pre = '<div class="markdown-body">'
+    suf = '</div>'
+    if text.startswith(pre) and text.endswith(suf):
+        return text
+    
     if '```' in text:
         # careful input
-        return text
+        return pre + text + suf
     else:
         # wtf input
         lines = text.split("\n")
         for i, line in enumerate(lines):
             lines[i] = lines[i].replace(" ", "&nbsp;")
         text = "</br>".join(lines)
-        return text
+        return pre + text + suf
 
 @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
 def markdown_convertion(txt):
@@ -342,8 +347,11 @@ def format_io(self, y):
     if y is None or y == []:
         return []
     i_ask, gpt_reply = y[-1]
-    i_ask = text_divide_paragraph(i_ask)  # 输入部分太自由，预处理一波
-    gpt_reply = close_up_code_segment_during_stream(gpt_reply)  # 当代码输出半截的时候，试着补上后个```
+    # 输入部分太自由，预处理一波
+    if i_ask is not None: i_ask = text_divide_paragraph(i_ask)
+    # 当代码输出半截的时候，试着补上后个```
+    if gpt_reply is not None: gpt_reply = close_up_code_segment_during_stream(gpt_reply)
+    # process
     y[-1] = (
         None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']),
         None if gpt_reply is None else markdown_convertion(gpt_reply)

From aeddf6b461d58eb7c755b1ed1d8ce2810cdf752f Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 11 Jun 2023 10:20:49 +0800
Subject: [PATCH 22/78] =?UTF-8?q?Update=20Latex=E8=BE=93=E5=87=BAPDF?=
 =?UTF-8?q?=E7=BB=93=E6=9E=9C.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/Latex输出PDF结果.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 855cc1c..6592c9a 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -146,7 +146,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
         from .latex_utils import Latex精细分解与转化, 编译Latex
     except Exception as e:
         chatbot.append([ f"解析项目: {txt}",
-            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
+            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
     
@@ -216,7 +216,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
         from .latex_utils import Latex精细分解与转化, 编译Latex
     except Exception as e:
         chatbot.append([ f"解析项目: {txt}",
-            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
+            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         return
     

From 3ecf2977a86abaf49ddaf112a196bc7f8fcb6717 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Sun, 11 Jun 2023 18:23:54 +0800
Subject: [PATCH 23/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcaption=E7=BF=BB?=
 =?UTF-8?q?=E8=AF=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 59 +++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index d3d7b9c..afaae22 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -29,7 +29,15 @@ def split_worker_reverse_caption(text, mask, pattern, flags=0):
     """
     pattern_compile = re.compile(pattern, flags)
     for res in pattern_compile.finditer(text):
-        mask[res.regs[1][0]:res.regs[1][1]] = TRANSFORM
+        brace_level = 0
+        p = begin = end = res.regs[1][0]
+        for _ in range(1024*16):
+            if text[p] == '}' and brace_level == 0: break
+            elif text[p] == '}':  brace_level -= 1
+            elif text[p] == '{':  brace_level += 1
+            p += 1
+        end = p
+        mask[begin:end] = TRANSFORM
     return text, mask
 
 def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
@@ -97,6 +105,7 @@ def 寻找Latex主文件(file_manifest, mode):
         else:
             continue
     raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
+
 def rm_comments(main_file):
     new_file_remove_comment_lines = []
     for l in main_file.splitlines():
@@ -108,6 +117,7 @@ def rm_comments(main_file):
     main_file = '\n'.join(new_file_remove_comment_lines)
     main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
     return main_file
+
 def merge_tex_files_(project_foler, main_file, mode):
     """
     Merge Tex project recrusively
@@ -185,14 +195,39 @@ def fix_content(final_tex, node_string):
     if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
         # walk and replace any _ without \
         final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
-    if node_string.count('{') != node_string.count('}'):
-        if final_tex.count('{') != node_string.count('{'):
-            final_tex = node_string # 出问题了，还原原文
-        if final_tex.count('}') != node_string.count('}'):
-            final_tex = node_string # 出问题了，还原原文
+
+    def compute_brace_level(string):
+        # this function count the number of { and }
+        brace_level = 0
+        for c in string:
+            if c == "{": brace_level += 1
+            elif c == "}": brace_level -= 1
+        return brace_level
+    def join_most(tex_t, tex_o):
+        # this function join translated string and original string when something goes wrong
+        p_t = 0
+        p_o = 0
+        def find_next(string, chars, begin):
+            p = begin
+            while p < len(string):
+                if string[p] in chars: return p, string[p]
+                p += 1
+            return None, None
+        while True:
+            res1, char = find_next(tex_o, ['{','}'], p_o)
+            if res1 is None: break
+            res2, char = find_next(tex_t, [char], p_t)
+            if res2 is None: break
+            p_o = res1 + 1
+            p_t = res2 + 1
+        return tex_t[:p_t] + tex_o[p_o:]
+
+    if compute_brace_level(final_tex) != compute_brace_level(node_string):
+        # 出问题了，还原部分原文，保证括号正确
+        final_tex = join_most(final_tex, node_string)
     return final_tex
 
-def split_subprocess(txt, project_folder, return_dict):
+def split_subprocess(txt, project_folder, return_dict, opts):
     """
     break down latex file to a linked list,
     each node use a preserve flag to indicate whether it should
@@ -239,7 +274,7 @@ def split_subprocess(txt, project_folder, return_dict):
     text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
-    # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
+    text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
     root = convert_to_linklist(text, mask)
 
     # 修复括号
@@ -369,7 +404,7 @@ class LatexPaperSplit():
             result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
         return result_string
 
-    def split(self, txt, project_folder): 
+    def split(self, txt, project_folder, opts): 
         """
         break down latex file to a linked list,
         each node use a preserve flag to indicate whether it should
@@ -381,7 +416,7 @@ class LatexPaperSplit():
         return_dict = manager.dict()
         p = multiprocessing.Process(
             target=split_subprocess, 
-            args=(txt, project_folder, return_dict))
+            args=(txt, project_folder, return_dict, opts))
         p.start()
         p.join()
         self.nodes = return_dict['nodes']
@@ -440,7 +475,7 @@ class LatexPaperFileGroup():
 
 
 
-def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None):
+def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
     import time, os, re
     from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
     from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
@@ -470,7 +505,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
 
     #  <-------- 精细切分latex文件 ----------> 
     lps = LatexPaperSplit()
-    res = lps.split(merged_content, project_folder) # 消耗时间的函数
+    res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
 
     #  <-------- 拆分过长的latex片段 ----------> 
     pfg = LatexPaperFileGroup()

From 790a1cf12a2a98811ccb4c38568f21b120049f7a Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Sun, 11 Jun 2023 20:12:25 +0800
Subject: [PATCH 24/78] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=B8=80=E4=BA=9B?=
 =?UTF-8?q?=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index afaae22..89ca7a5 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -23,9 +23,26 @@ def split_worker(text, mask, pattern, flags=0):
         mask[res.span()[0]:res.span()[1]] = PRESERVE
     return text, mask
 
-def split_worker_reverse_caption(text, mask, pattern, flags=0):
+def split_worker_careful_brace(text, mask, pattern, flags=0):
     """
-    Move caption area out of preserve area 
+    Move area into preserve area 
+    """
+    pattern_compile = re.compile(pattern, flags)
+    for res in pattern_compile.finditer(text):
+        brace_level = -1
+        p = begin = end = res.regs[0][0]
+        for _ in range(1024*16):
+            if text[p] == '}' and brace_level == 0: break
+            elif text[p] == '}':  brace_level -= 1
+            elif text[p] == '{':  brace_level += 1
+            p += 1
+        end = p+1
+        mask[begin:end] = PRESERVE
+    return text, mask
+
+def split_worker_reverse_careful_brace(text, mask, pattern, flags=0):
+    """
+    Move area out of preserve area 
     """
     pattern_compile = re.compile(pattern, flags)
     for res in pattern_compile.finditer(text):
@@ -274,7 +291,8 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
-    text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
+    text, mask = split_worker_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
+    text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
     root = convert_to_linklist(text, mask)
 
     # 修复括号
@@ -504,6 +522,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
         f.write(merged_content)
 
     #  <-------- 精细切分latex文件 ----------> 
+    chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件，这需要一段时间计算，文档越长耗时越长，请耐心等待。'))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
     lps = LatexPaperSplit()
     res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
 
@@ -602,7 +622,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
     current_dir = os.getcwd()
     n_fix = 1
     max_try = 32
-    chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}，如果程序停顿5分钟以上，则大概率是卡死在Latex里面了。不幸卡死时请直接去该路径下取回翻译结果，或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
+    chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}，如果程序停顿5分钟以上，请直接去该路径下取回翻译结果，或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
     chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
     yield from update_ui_lastest_msg('编译已经开始...', chatbot, history)   # 刷新Gradio前端界面
 

From 9fd212652ed0e80d3e55b9b72461fc24d3837ce1 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Mon, 12 Jun 2023 09:45:59 +0800
Subject: [PATCH 25/78] =?UTF-8?q?=E4=B8=93=E4=B8=9A=E8=AF=8D=E6=B1=87?=
 =?UTF-8?q?=E5=A3=B0=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/Latex输出PDF结果.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 6592c9a..2e9a30b 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -5,7 +5,7 @@ pj = os.path.join
 ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
 
 # =================================== 工具函数 ===============================================
-沙雕GPT啊别犯这些低级翻译错误  = 'You must to translate "agent" to "智能体". '
+专业词汇声明  = 'If the term "agent" is used in this section, it should be translated to "智能体". '
 def switch_prompt(pfg, mode):
     """
     Generate prompts and system prompts based on the mode for proofreading or translating.
@@ -25,7 +25,7 @@ def switch_prompt(pfg, mode):
                         f"\n\n{frag}" for frag in pfg.sp_file_contents]
         sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
     elif mode == 'translate_zh':
-        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese." + 沙雕GPT啊别犯这些低级翻译错误 + 
+        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + 
                         r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
                         r"Answer me only with the translated text:" + 
                         f"\n\n{frag}" for frag in pfg.sp_file_contents]

From 9ac3d0d65dfd1ae4209a28139d133057a3c22e39 Mon Sep 17 00:00:00 2001
From: OverKit <78402478+OverKit@users.noreply.github.com>
Date: Mon, 12 Jun 2023 10:09:52 +0800
Subject: [PATCH 26/78] check letter % after removing spaces or tabs in the
 left

---
 crazy_functions/latex_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 89ca7a5..53894ca 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -127,7 +127,7 @@ def rm_comments(main_file):
     new_file_remove_comment_lines = []
     for l in main_file.splitlines():
         # 删除整行的空注释
-        if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")):
+        if l.lstrip().startswith("%"):
             pass
         else:
             new_file_remove_comment_lines.append(l)

From c365ea9f579acb88ffd756c3483c5c29fc2b57c3 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Tue, 13 Jun 2023 16:13:19 +0800
Subject: [PATCH 27/78] Update README.md

---
 README.md | 47 ++++++++++++++++++++++-------------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index d4d6858..581d3d5 100644
--- a/README.md
+++ b/README.md
@@ -228,7 +228,7 @@ docker-compose up
 
 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件，
 另外在函数插件区（下拉菜单）调用 `载入对话历史存档` ，即可还原之前的会话。
-Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存，点击 `删除所有本地对话历史记录` 可以删除所有html存档缓存。
+Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" >
 </div>
@@ -251,38 +251,33 @@ Tip：不指定文件直接点击 `载入对话历史存档` 可以查看历史h
 <img src="https://user-images.githubusercontent.com/96192199/227504931-19955f78-45cd-4d1c-adac-e71e50957915.png" height="400" >
 </div>
 
-5. 这是一个能够“自我译解”的开源项目
-<div align="center">
-<img src="https://user-images.githubusercontent.com/96192199/226936850-c77d7183-0749-4c1c-9875-fd4891842d0c.png" width="500" >
-</div>
-
-6. 译解其他开源项目，不在话下
+5. 译解其他开源项目
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/226935232-6b6a73ce-8900-4aee-93f9-733c7e6fef53.png" height="250" >
 <img src="https://user-images.githubusercontent.com/96192199/226969067-968a27c1-1b9c-486b-8b81-ab2de8d3f88a.png" height="250" >
 </div>
 
-7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能（默认关闭，需要修改`config.py`）
+6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能（默认关闭，需要修改`config.py`）
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/236432361-67739153-73e8-43fe-8111-b61296edabd9.png" width="500" >
 </div>
 
-8. 新增MOSS大语言模型支持
+7. 新增MOSS大语言模型支持
 <div align="center">
 <img src="https://user-images.githubusercontent.com/96192199/236639178-92836f37-13af-4fdd-984d-b4450fe30336.png" width="500" >
 </div>
 
-9. OpenAI图像生成
+8. OpenAI图像生成
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/bc7ab234-ad90-48a0-8d62-f703d9e74665" width="500" >
 </div>
 
-10. OpenAI音频解析与总结
+9. OpenAI音频解析与总结
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/709ccf95-3aee-498a-934a-e1c22d3d5d5b" width="500" >
 </div>
 
-11. Latex全文校对纠错
+10. Latex全文校对纠错
 <div align="center">
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/651ccd98-02c9-4464-91e1-77a6b7d1b033" height="200" > ===>
 <img src="https://github.com/binary-husky/gpt_academic/assets/96192199/476f66d9-7716-4537-b5c1-735372c25adb" height="200">
@@ -310,30 +305,32 @@ gpt_academic开发者QQ群-2：610599535
 
 - 已知问题
     - 某些浏览器翻译插件干扰此软件前端的运行
-    - 官方Gradio目前有很多兼容性Bug，请务必使用requirement.txt安装Gradio
+    - 官方Gradio目前有很多兼容性Bug，请务必使用`requirement.txt`安装Gradio
 
 ## 参考与学习
 
 ```
-代码中参考了很多其他优秀项目中的设计，主要包括：
+代码中参考了很多其他优秀项目中的设计，顺序不分先后：
 
-# 项目1：清华ChatGLM-6B:
+# 清华ChatGLM-6B:
 https://github.com/THUDM/ChatGLM-6B
 
-# 项目2：清华JittorLLMs:
+# 清华JittorLLMs:
 https://github.com/Jittor/JittorLLMs
 
-# 项目3：Edge-GPT:
-https://github.com/acheong08/EdgeGPT
-
-# 项目4：ChuanhuChatGPT:
-https://github.com/GaiZhenbiao/ChuanhuChatGPT
-
-# 项目5：ChatPaper:
+# ChatPaper:
 https://github.com/kaixindelele/ChatPaper
 
-# 更多：
+# Edge-GPT:
+https://github.com/acheong08/EdgeGPT
+
+# ChuanhuChatGPT:
+https://github.com/GaiZhenbiao/ChuanhuChatGPT
+
+# Oobabooga one-click installer:
+https://github.com/oobabooga/one-click-installers
+
+# More：
 https://github.com/gradio-app/gradio
 https://github.com/fghrsh/live2d_demo
-https://github.com/oobabooga/one-click-installers
 ```

From c40ebfc21f39b995b3f0437b387d5ee561759327 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Wed, 14 Jun 2023 09:50:15 +0800
Subject: [PATCH 28/78] =?UTF-8?q?=E5=B0=86gpt-3.5-16k=E4=BD=9C=E4=B8=BA?=
 =?UTF-8?q?=E5=8A=A0=E5=85=A5=E6=94=AF=E6=8C=81=E5=88=97=E8=A1=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py                 | 2 +-
 request_llm/bridge_all.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/config.py b/config.py
index 14b089e..87e0ec9 100644
--- a/config.py
+++ b/config.py
@@ -46,7 +46,7 @@ MAX_RETRY = 2
 
 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
 LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
+AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
 # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
 
 # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index b6efe21..a27407c 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -83,6 +83,15 @@ model_info = {
         "tokenizer": tokenizer_gpt35,
         "token_cnt": get_token_num_gpt35,
     },
+    
+    "gpt-3.5-turbo-16k": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 1024*16,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
 
     "gpt-4": {
         "fn_with_ui": chatgpt_ui,

From 8c62f21aa6b0c68bdc795f315f5d325b1384161b Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Wed, 14 Jun 2023 09:57:09 +0800
Subject: [PATCH 29/78] =?UTF-8?q?3.41=E5=A2=9E=E5=8A=A0gpt-3.5-16k?=
 =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version b/version
index 669c708..ceb909a 100644
--- a/version
+++ b/version
@@ -1,5 +1,5 @@
 {
-  "version": 3.4,
+  "version": 3.41,
   "show_feature": true,
-  "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
+  "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
 }

From 73d4a1ff4b41548b6d6b5ea4c321fa2e81fe55ce Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Wed, 14 Jun 2023 10:15:47 +0800
Subject: [PATCH 30/78] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 581d3d5..39b37ea 100644
--- a/README.md
+++ b/README.md
@@ -47,7 +47,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png)，支持公式、代码高亮
 多线程函数插件支持 | 支持多线调用chatgpt，一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
-[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持，[API2D](https://api2d.com/)接口支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧？
+[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧？
 更多LLM模型接入，支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应)，引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)，[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/)
 更多新功能展示(图像生成等) …… | 见本文档结尾处 ……
 

From ef752838b06a4898ba23ea34dd349be6a51a199e Mon Sep 17 00:00:00 2001
From: Skyzayre <120616113+Skyzayre@users.noreply.github.com>
Date: Thu, 15 Jun 2023 02:07:43 +0800
Subject: [PATCH 31/78] Update README.md

---
 README.md | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 39b37ea..c3dd52a 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la
 >
 > 1.请注意只有**红颜色**标识的函数插件（按钮）才支持读取文件，部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR！
 >
-> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代，您也可以随时自行点击相关函数插件，调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。
+> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代，您也可以随时自行点击相关函数插件，调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/gpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。
 > 
 > 3.本项目兼容并鼓励尝试国产大语言模型chatglm和RWKV, 盘古等等。支持多个api-key共存，可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时，在输入区输入临时的`API_KEY`然后回车键提交后即可生效。
 
@@ -31,13 +31,13 @@ To translate this project to arbitary language with GPT, read and run [`multi_la
 一键中英互译 | 一键中英互译
 一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释
 [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键
-模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/chatgpt_academic/tree/master/crazy_functions)，插件支持[热更新](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
-[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码
+模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions)，插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)
+[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码
 [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树
 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [函数插件] 一键解读latex/pdf论文全文并生成摘要
 Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [函数插件] 一键翻译或润色latex论文
 批量注释生成 | [函数插件] 一键批量生成函数注释
-Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/chatgpt_academic/blob/master/docs/README_EN.md)了吗？
+Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗？
 chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文（多线程）
 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF
@@ -46,7 +46,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 ⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/)，迄今为止最好的论文翻译工具⭐
 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png)，支持公式、代码高亮
 多线程函数插件支持 | 支持多线调用chatgpt，一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
-启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
+启动暗色gradio[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
 [多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧？
 更多LLM模型接入，支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应)，引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)，[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/)
 更多新功能展示(图像生成等) …… | 见本文档结尾处 ……
@@ -91,8 +91,8 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 
 1. 下载项目
 ```sh
-git clone https://github.com/binary-husky/chatgpt_academic.git
-cd chatgpt_academic
+git clone https://github.com/binary-husky/.git
+cd gpt_academic
 ```
 
 2. 配置API_KEY
@@ -150,8 +150,8 @@ python main.py
 1. 仅ChatGPT（推荐大多数人选择）
 
 ``` sh
-git clone https://github.com/binary-husky/chatgpt_academic.git  # 下载项目
-cd chatgpt_academic                                 # 进入路径
+git clone https://github.com/binary-husky/gpt_academic.git  # 下载项目
+cd gpt_academic                                 # 进入路径
 nano config.py                                      # 用任意文本编辑器编辑config.py, 配置 “Proxy”， “API_KEY” 以及 “WEB_PORT” (例如50923) 等
 docker build -t gpt-academic .                      # 安装
 
@@ -188,10 +188,10 @@ docker-compose up
 按照`config.py`中的说明配置API_URL_REDIRECT即可。
 
 4. 远程云服务器部署（需要云服务器知识与经验）。
-请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
+请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
 
 5. 使用WSL2（Windows Subsystem for Linux 子系统）。
-请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
+请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
 
 6. 如何在二级网址（如`http://localhost/subpath`）下运行。
 请访问[FastAPI运行说明](docs/WithFastapi.md)
@@ -220,7 +220,7 @@ docker-compose up
 
 编写强大的函数插件来执行任何你想得到的和想不到的任务。
 本项目的插件编写、调试难度很低，只要您具备一定的python基础知识，就可以仿照我们提供的模板实现自己的插件功能。
-详情请参考[函数插件指南](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
+详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。
 
 ---
 # Latest Update

From 6d849eeb121b9f88821d7e46cad95c32bb9a12a9 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 16 Jun 2023 17:33:03 +0800
Subject: [PATCH 32/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8DLangchain=E6=8F=92?=
 =?UTF-8?q?=E4=BB=B6=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/Langchain知识库.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py
index 5b09d3b..31c459a 100644
--- a/crazy_functions/Langchain知识库.py
+++ b/crazy_functions/Langchain知识库.py
@@ -30,7 +30,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
         )
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         from .crazy_utils import try_install_deps
-        try_install_deps(['zh_langchain==0.2.0'])
+        try_install_deps(['zh_langchain==0.2.1'])
     
     # < --------------------读取参数--------------- >
     if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
@@ -84,7 +84,7 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
         chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装，请查看终端的输出或耐心等待..."])
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
         from .crazy_utils import try_install_deps
-        try_install_deps(['zh_langchain==0.2.0'])
+        try_install_deps(['zh_langchain==0.2.1'])
 
     # < -------------------  --------------- >
     kai = knowledge_archive_interface()

From bb864c631376320f8847b36e4a75a38edbb23176 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 16 Jun 2023 17:33:19 +0800
Subject: [PATCH 33/78] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=B8=80=E4=BA=9B?=
 =?UTF-8?q?=E6=8F=90=E7=A4=BA=E6=96=87=E5=AD=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 53894ca..78eec29 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -165,17 +165,23 @@ def merge_tex_files(project_foler, main_file, mode):
     main_file = rm_comments(main_file)
 
     if mode == 'translate_zh':
+        # find paper documentclass
         pattern = re.compile(r'\\documentclass.*\n')
         match = pattern.search(main_file)
+        assert match is not None, "Cannot find documentclass statement!"
         position = match.end()
         add_ctex = '\\usepackage{ctex}\n'
         add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
         main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
-        # 2 fontset=windows
+        # fontset=windows
         import platform
         if platform.system() != 'Windows':
             main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file)
             main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file)
+        # find paper abstract
+        pattern = re.compile(r'\\begin\{abstract\}.*\n')
+        match = pattern.search(main_file)
+        assert match is not None, "Cannot find paper abstract section!"
     return main_file
 
 
@@ -418,6 +424,7 @@ class LatexPaperSplit():
         if mode == 'translate_zh':
             pattern = re.compile(r'\\begin\{abstract\}.*\n')
             match = pattern.search(result_string)
+            assert match is not None, "Cannot find paper abstract section!"
             position = match.end()
             result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
         return result_string

From 9a5a509dd9c85949a89a7ef763572dca92afeb46 Mon Sep 17 00:00:00 2001
From: OverKit <room@scnumath.eu.org>
Date: Sat, 17 Jun 2023 19:27:21 +0800
Subject: [PATCH 34/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=85=B3=E4=BA=8Eabstr?=
 =?UTF-8?q?act=E7=9A=84=E6=90=9C=E7=B4=A2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 78eec29..3734f00 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -424,7 +424,9 @@ class LatexPaperSplit():
         if mode == 'translate_zh':
             pattern = re.compile(r'\\begin\{abstract\}.*\n')
             match = pattern.search(result_string)
-            assert match is not None, "Cannot find paper abstract section!"
+            if not match:
+                pattern = re.compile(r'\\abstract\{')
+                match = pattern.search(result_string)
             position = match.end()
             result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
         return result_string

From 2bb13b4677b3bd403b950c97036a2753cda5ec8b Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 15:44:42 +0800
Subject: [PATCH 35/78] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c3dd52a..f079086 100644
--- a/README.md
+++ b/README.md
@@ -160,6 +160,7 @@ docker run --rm -it --net=host gpt-academic
 #（最后一步-选择2）在macOS/windows环境下，只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
 ```
+P.S. 如果需要Latex功能，请见另一个[Dockerfile](https://github.com/binary-husky/gpt_academic/blob/master/docs/Dockerfile%2BNoLocal%2BLatex)
 
 2. ChatGPT + ChatGLM + MOSS（需要熟悉Docker）
 

From 7fdf0a8e51ee7acfcb2822d07a6c3ed1e8c52846 Mon Sep 17 00:00:00 2001
From: OverKit <room@scnumath.eu.org>
Date: Sun, 18 Jun 2023 15:51:29 +0800
Subject: [PATCH 36/78] =?UTF-8?q?=E8=B0=83=E6=95=B4=E5=8C=BA=E5=88=86?=
 =?UTF-8?q?=E5=86=85=E5=AE=B9=E7=9A=84=E4=BB=A3=E7=A0=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 81 +++++++++++++++++++++++++---------
 1 file changed, 59 insertions(+), 22 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 3734f00..eebce80 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -23,38 +23,67 @@ def split_worker(text, mask, pattern, flags=0):
         mask[res.span()[0]:res.span()[1]] = PRESERVE
     return text, mask
 
-def split_worker_careful_brace(text, mask, pattern, flags=0):
+def set_transform_area(text, mask, pattern, flags=0):
     """
-    Move area into preserve area 
+    Add a transform text area in this paper
     """
     pattern_compile = re.compile(pattern, flags)
     for res in pattern_compile.finditer(text):
-        brace_level = -1
-        p = begin = end = res.regs[0][0]
-        for _ in range(1024*16):
-            if text[p] == '}' and brace_level == 0: break
-            elif text[p] == '}':  brace_level -= 1
-            elif text[p] == '{':  brace_level += 1
-            p += 1
-        end = p+1
-        mask[begin:end] = PRESERVE
+        mask[res.span()[0] : res.span()[1]] = TRANSFORM
     return text, mask
 
+
+def split_worker_careful_brace(text, mask, pattern, flags=0):
+    """
+    Move area into preserve area.
+    It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})".
+    """
+    pattern_compile = re.compile(pattern, flags)
+    res = pattern_compile.search(text)
+
+    # 确保捕获组存在
+    if res and len(res.regs) > 1:
+        brace_level = 0
+        p = begin = end = res.regs[1][0]
+        for _ in range(1024 * 16):
+            if text[p] == "}" and brace_level == 1:
+                break
+            elif text[p] == "}":
+                brace_level -= 1
+            elif text[p] == "{":
+                brace_level += 1
+            p += 1
+        end = p
+        mask[begin + 1 : end] = PRESERVE
+        split_worker_careful_brace(text[end:], mask[end:], pattern, flags=flags)
+
+    return text, mask
+
+
 def split_worker_reverse_careful_brace(text, mask, pattern, flags=0):
     """
-    Move area out of preserve area 
+    Move area out of preserve area.
+    It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})".
     """
     pattern_compile = re.compile(pattern, flags)
-    for res in pattern_compile.finditer(text):
+    res = pattern_compile.search(text)
+
+    # 确保捕获组存在
+    if res and len(res.regs) > 1:
         brace_level = 0
         p = begin = end = res.regs[1][0]
-        for _ in range(1024*16):
-            if text[p] == '}' and brace_level == 0: break
-            elif text[p] == '}':  brace_level -= 1
-            elif text[p] == '{':  brace_level += 1
+        for _ in range(1024 * 16):
+            if text[p] == "}" and brace_level == 1:
+                break
+            elif text[p] == "}":
+                brace_level -= 1
+            elif text[p] == "{":
+                brace_level += 1
             p += 1
         end = p
-        mask[begin:end] = TRANSFORM
+        mask[begin + 1 : end] = TRANSFORM
+        split_worker_reverse_careful_brace(text[end:], mask[end:], pattern, flags=flags)
+
     return text, mask
 
 def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
@@ -260,13 +289,14 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
 
     # 吸收title与作者以上的部分
-    text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL)
+    text, mask = split_worker(text, mask, r".*?\\begin\{document\}", re.DOTALL)
     # 删除iffalse注释
     text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
     # 吸收在25行以内的begin-end组合
     text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
     # 吸收匿名公式
     text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\\[.*?\\\]", re.DOTALL)
     # 吸收其他杂项
     text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
@@ -274,6 +304,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
+    text, mask = split_worker(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
     text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
     text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
     text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
@@ -293,12 +324,18 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
     text, mask = split_worker(text, mask, r"\\item ")
     text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
+
+    text, mask = set_transform_area(text, mask, r"\\begin\{abstract\}.*?\\end\{abstract\}", re.DOTALL)
+
+    text, mask = split_worker_careful_brace(text, mask, r"\\hl(\{.*\})", re.DOTALL)
+    text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption(\{.*\})", re.DOTALL)
+    text, mask = split_worker_reverse_careful_brace(text, mask, r"\\abstract(\{.*\})", re.DOTALL)
+
+    text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
-    text, mask = split_worker_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
-    text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
+
     root = convert_to_linklist(text, mask)
 
     # 修复括号

From 4bafbb3562f249b9b10a3595ac9f859762a52377 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 15:54:23 +0800
Subject: [PATCH 37/78] =?UTF-8?q?Update=20Latex=E8=BE=93=E5=87=BAPDF?=
 =?UTF-8?q?=E7=BB=93=E6=9E=9C.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/Latex输出PDF结果.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 2e9a30b..6c89751 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -205,7 +205,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
     # <-------------- information about this plugin ------------->
     chatbot.append([
         "函数插件功能？",
-        "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。"])
+        "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳，Linux下必须使用Docker安装，详见项目主README.md。目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。"])
     yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
 

From 8d7ee975a012a9b258408ddcf9a59ea4a29a752d Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 16:10:45 +0800
Subject: [PATCH 38/78] Update README.md

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f079086..182a49b 100644
--- a/README.md
+++ b/README.md
@@ -113,6 +113,8 @@ conda activate gptac_venv                 # 激活anaconda环境
 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
 ```
 
+P.S. 如果需要依赖Latex的插件功能，请见Wiki
+
 <details><summary>如果需要支持清华ChatGLM/复旦MOSS作为后端，请点击展开此处</summary>
 <p>
 
@@ -160,7 +162,7 @@ docker run --rm -it --net=host gpt-academic
 #（最后一步-选择2）在macOS/windows环境下，只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
 ```
-P.S. 如果需要Latex功能，请见另一个[Dockerfile](https://github.com/binary-husky/gpt_academic/blob/master/docs/Dockerfile%2BNoLocal%2BLatex)
+P.S. 如果需要依赖Latex的插件功能，请见Wiki
 
 2. ChatGPT + ChatGLM + MOSS（需要熟悉Docker）
 

From 29c6bfb6cb08f58a0e5fba8540ef56cf36277cf6 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 16:12:06 +0800
Subject: [PATCH 39/78] Update README.md

---
 README.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 182a49b..6e461c2 100644
--- a/README.md
+++ b/README.md
@@ -113,11 +113,16 @@ conda activate gptac_venv                 # 激活anaconda环境
 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
 ```
 
-P.S. 如果需要依赖Latex的插件功能，请见Wiki
-
 <details><summary>如果需要支持清华ChatGLM/复旦MOSS作为后端，请点击展开此处</summary>
 <p>
 
+
+<details><summary>如果需要依赖Latex的插件功能（如Arxiv文献翻译），请点击展开此处</summary>
+<p>
+ 请见[Wiki](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8Latex%E7%9B%B8%E5%85%B3%E6%8F%92%E4%BB%B6%EF%BC%88arxiv%E6%96%87%E7%AB%A0%E7%BF%BB%E8%AF%91%EF%BC%89)
+</p>
+</details>
+
 【可选步骤】如果需要支持清华ChatGLM/复旦MOSS作为后端，需要额外安装更多依赖（前提条件：熟悉Python + 用过Pytorch + 电脑配置够强）：
 ```sh
 # 【可选步骤I】支持清华ChatGLM。清华ChatGLM备注：如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误，参考如下： 1：以上默认安装的为torch+cpu版，使用cuda需要卸载torch重新安装torch+cuda； 2：如因本机配置不够无法加载模型，可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)

From a06e43c96b9f6c199b0d440d5db2e7247224a18b Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 16:15:37 +0800
Subject: [PATCH 40/78] Update README.md

---
 README.md | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 6e461c2..c69bfb5 100644
--- a/README.md
+++ b/README.md
@@ -113,16 +113,10 @@ conda activate gptac_venv                 # 激活anaconda环境
 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
 ```
 
+
 <details><summary>如果需要支持清华ChatGLM/复旦MOSS作为后端，请点击展开此处</summary>
 <p>
 
-
-<details><summary>如果需要依赖Latex的插件功能（如Arxiv文献翻译），请点击展开此处</summary>
-<p>
- 请见[Wiki](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8Latex%E7%9B%B8%E5%85%B3%E6%8F%92%E4%BB%B6%EF%BC%88arxiv%E6%96%87%E7%AB%A0%E7%BF%BB%E8%AF%91%EF%BC%89)
-</p>
-</details>
-
 【可选步骤】如果需要支持清华ChatGLM/复旦MOSS作为后端，需要额外安装更多依赖（前提条件：熟悉Python + 用过Pytorch + 电脑配置够强）：
 ```sh
 # 【可选步骤I】支持清华ChatGLM。清华ChatGLM备注：如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误，参考如下： 1：以上默认安装的为torch+cpu版，使用cuda需要卸载torch重新安装torch+cuda； 2：如因本机配置不够无法加载模型，可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)

From d5bab093f94523665c5b0a6b7781dd491123faff Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Mon, 19 Jun 2023 15:17:33 +1000
Subject: [PATCH 41/78] rename function names

---
 crazy_functions/latex_utils.py | 163 ++++++++++++++-------------------
 1 file changed, 69 insertions(+), 94 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index eebce80..a984b2f 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -8,85 +8,65 @@ pj = os.path.join
 """
 ========================================================================
 Part One
-Latex segmentation to a linklist
+Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
 ========================================================================
 """
 PRESERVE = 0
 TRANSFORM = 1
 
-def split_worker(text, mask, pattern, flags=0):
+def set_forbidden_text(text, mask, pattern, flags=0):
     """
     Add a preserve text area in this paper
+    e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
+    you can mask out (mask = PRESERVE so that text become untouchable for GPT) 
+    everything between "\begin{equation}" and "\end{equation}"
     """
     pattern_compile = re.compile(pattern, flags)
     for res in pattern_compile.finditer(text):
         mask[res.span()[0]:res.span()[1]] = PRESERVE
     return text, mask
 
-def set_transform_area(text, mask, pattern, flags=0):
+def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
     """
-    Add a transform text area in this paper
+    Add a preserve text area in this paper (text become untouchable for GPT).
+    count the number of the braces so as to catch compelete text area. 
+    e.g.
+    \caption{blablablablabla\texbf{blablabla}blablabla.} 
     """
     pattern_compile = re.compile(pattern, flags)
     for res in pattern_compile.finditer(text):
-        mask[res.span()[0] : res.span()[1]] = TRANSFORM
+        brace_level = -1
+        p = begin = end = res.regs[0][0]
+        for _ in range(1024*16):
+            if text[p] == '}' and brace_level == 0: break
+            elif text[p] == '}':  brace_level -= 1
+            elif text[p] == '{':  brace_level += 1
+            p += 1
+        end = p+1
+        mask[begin:end] = PRESERVE
     return text, mask
 
-
-def split_worker_careful_brace(text, mask, pattern, flags=0):
+def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0):
     """
-    Move area into preserve area.
-    It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})".
+    Move area out of preserve area (make text editable for GPT)
+    count the number of the braces so as to catch compelete text area. 
+    e.g.
+    \caption{blablablablabla\texbf{blablabla}blablabla.} 
     """
     pattern_compile = re.compile(pattern, flags)
-    res = pattern_compile.search(text)
-
-    # 确保捕获组存在
-    if res and len(res.regs) > 1:
+    for res in pattern_compile.finditer(text):
         brace_level = 0
         p = begin = end = res.regs[1][0]
-        for _ in range(1024 * 16):
-            if text[p] == "}" and brace_level == 1:
-                break
-            elif text[p] == "}":
-                brace_level -= 1
-            elif text[p] == "{":
-                brace_level += 1
+        for _ in range(1024*16):
+            if text[p] == '}' and brace_level == 0: break
+            elif text[p] == '}':  brace_level -= 1
+            elif text[p] == '{':  brace_level += 1
             p += 1
         end = p
-        mask[begin + 1 : end] = PRESERVE
-        split_worker_careful_brace(text[end:], mask[end:], pattern, flags=flags)
-
+        mask[begin:end] = TRANSFORM
     return text, mask
 
-
-def split_worker_reverse_careful_brace(text, mask, pattern, flags=0):
-    """
-    Move area out of preserve area.
-    It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})".
-    """
-    pattern_compile = re.compile(pattern, flags)
-    res = pattern_compile.search(text)
-
-    # 确保捕获组存在
-    if res and len(res.regs) > 1:
-        brace_level = 0
-        p = begin = end = res.regs[1][0]
-        for _ in range(1024 * 16):
-            if text[p] == "}" and brace_level == 1:
-                break
-            elif text[p] == "}":
-                brace_level -= 1
-            elif text[p] == "{":
-                brace_level += 1
-            p += 1
-        end = p
-        mask[begin + 1 : end] = TRANSFORM
-        split_worker_reverse_careful_brace(text[end:], mask[end:], pattern, flags=flags)
-
-    return text, mask
-
-def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
+def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
     """
     Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
     Add it to preserve area
@@ -289,53 +269,48 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
 
     # 吸收title与作者以上的部分
-    text, mask = split_worker(text, mask, r".*?\\begin\{document\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
     # 删除iffalse注释
-    text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
     # 吸收在25行以内的begin-end组合
-    text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
+    text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
     # 吸收匿名公式
-    text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\\[.*?\\\]", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\\[.*?\\\]", re.DOTALL)
     # 吸收其他杂项
-    text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
-    text, mask = split_worker(text, mask, r"\\item ")
-    text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
-
-    text, mask = set_transform_area(text, mask, r"\\begin\{abstract\}.*?\\end\{abstract\}", re.DOTALL)
-
-    text, mask = split_worker_careful_brace(text, mask, r"\\hl(\{.*\})", re.DOTALL)
-    text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption(\{.*\})", re.DOTALL)
-    text, mask = split_worker_reverse_careful_brace(text, mask, r"\\abstract(\{.*\})", re.DOTALL)
-
-    text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
-    text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
-
+    text, mask = set_forbidden_text(text, mask, r"\\section\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\section\*\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\subsection\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\subsubsection\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\bibliography\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\bibliographystyle\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"\\item ")
+    text, mask = set_forbidden_text(text, mask, r"\\label\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\begin\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\vspace\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\hspace\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, r"\\end\{(.*?)\}")
+    text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
+    # reverse 操作必须放在最后
+    text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
     root = convert_to_linklist(text, mask)
 
     # 修复括号

From af7734dd35c62de6f85a18b00c3598527b85cfb4 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Mon, 19 Jun 2023 16:57:11 +1000
Subject: [PATCH 42/78] avoid file fusion

---
 crazy_functions/latex_utils.py |  2 +-
 main.py                        |  4 ++--
 toolbox.py                     | 14 ++++++++++----
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 78eec29..163d0e2 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -685,7 +685,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
             result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
             if os.path.exists(pj(work_folder, '..', 'translation')):
                 shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
-            promote_file_to_downloadzone(result_pdf)
+            promote_file_to_downloadzone(result_pdf, chatbot)
             return True # 成功啦
         else:
             if n_fix>=max_try: break
diff --git a/main.py b/main.py
index 7dbf17f..65e1f4c 100644
--- a/main.py
+++ b/main.py
@@ -155,7 +155,7 @@ def main():
         for k in crazy_fns:
             if not crazy_fns[k].get("AsButton", True): continue
             click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo)
-            click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
+            click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot])
             cancel_handles.append(click_handle)
         # 函数插件-下拉菜单与随变按钮的互动
         def on_dropdown_changed(k):
@@ -175,7 +175,7 @@ def main():
             if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
             yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs)
         click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
-        click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
+        click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot])
         cancel_handles.append(click_handle)
         # 终止按钮的回调函数注册
         stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
diff --git a/toolbox.py b/toolbox.py
index 4ab1116..ac49afc 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -439,13 +439,15 @@ def find_recent_files(directory):
 
     return recent_files
 
-def promote_file_to_downloadzone(file, rename_file=None):
+def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
     # 将文件复制一份到下载区
     import shutil
     if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
     new_path = os.path.join(f'./gpt_log/', rename_file)
     if os.path.exists(new_path): os.remove(new_path)
     shutil.copyfile(file, new_path)
+    if chatbot:
+        chatbot._cookies.update({'file_to_promote': [new_path]})
 
 def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
     """
@@ -485,16 +487,20 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
     return chatbot, txt, txt2
 
 
-def on_report_generated(files, chatbot):
+def on_report_generated(cookies, files, chatbot):
     from toolbox import find_recent_files
-    report_files = find_recent_files('gpt_log')
+    if 'file_to_promote' in cookies:
+        report_files = cookies['file_to_promote']
+        cookies.pop('file_to_promote')
+    else:
+        report_files = find_recent_files('gpt_log')
     if len(report_files) == 0:
         return None, chatbot
     # files.extend(report_files)
     file_links = ''
     for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
     chatbot.append(['报告如何远程获取？', f'报告已经添加到右侧“文件上传区”（可能处于折叠状态），请查收。{file_links}'])
-    return report_files, chatbot
+    return cookies, report_files, chatbot
 
 def is_openai_api_key(key):
     API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)

From f3e4e26e2f095e1f0d3b5faeaec23fde2b0b33a0 Mon Sep 17 00:00:00 2001
From: dackdawn <whileangel@outlook.com>
Date: Mon, 19 Jun 2023 21:40:26 +0800
Subject: [PATCH 43/78] =?UTF-8?q?=E6=B7=BB=E5=8A=A00613=E6=A8=A1=E5=9E=8B?=
 =?UTF-8?q?=E7=9A=84=E5=A3=B0=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

openai对gpt-3.5-turbo的RPM限制是3，而gpt-3.5-turbo-0613的RPM是60，虽然两个模型的内容是一致的，但是选定特定模型可以获得更高的RPM和TPM
---
 config.py                 |  2 +-
 request_llm/bridge_all.py | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/config.py b/config.py
index 87e0ec9..917c268 100644
--- a/config.py
+++ b/config.py
@@ -46,7 +46,7 @@ MAX_RETRY = 2
 
 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
 LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
+AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0613", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
 # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
 
 # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index a27407c..22fa04b 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -93,6 +93,24 @@ model_info = {
         "token_cnt": get_token_num_gpt35,
     },
 
+    "gpt-3.5-turbo-0613": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 4096,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+
+    "gpt-3.5-turbo-16k-0613": {
+        "fn_with_ui": chatgpt_ui,
+        "fn_without_ui": chatgpt_noui,
+        "endpoint": openai_endpoint,
+        "max_token": 1024 * 16,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+
     "gpt-4": {
         "fn_with_ui": chatgpt_ui,
         "fn_without_ui": chatgpt_noui,

From 5da633d94dfa13c7658956537bc7c6c0d37e8a73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lebenito=EF=BC=88=E7=94=9F=E7=B3=B8=EF=BC=89?=
 <i@lebenito.net>
Date: Tue, 20 Jun 2023 19:10:11 +0800
Subject: [PATCH 44/78] Update README.md

Fix the error URL for the git clone.
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c69bfb5..7976076 100644
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 
 1. 下载项目
 ```sh
-git clone https://github.com/binary-husky/.git
+git clone https://github.com/binary-husky/gpt_academic.git
 cd gpt_academic
 ```
 

From 61eb0da861526ccee760caba86ffca387d9af358 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Tue, 20 Jun 2023 22:08:09 +1000
Subject: [PATCH 45/78] fix encoding bug

---
 crazy_functions/latex_utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 163d0e2..308044f 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -175,9 +175,8 @@ def merge_tex_files(project_foler, main_file, mode):
         main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
         # fontset=windows
         import platform
-        if platform.system() != 'Windows':
-            main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file)
-            main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file)
+        main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
+        main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
         # find paper abstract
         pattern = re.compile(r'\\begin\{abstract\}.*\n')
         match = pattern.search(main_file)

From bf955aaf12e94674877ca61d02d197547ae05cee Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Tue, 20 Jun 2023 23:12:30 +1000
Subject: [PATCH 46/78] fix bugs

---
 crazy_functional.py                 | 45 +++++++++++++++++++----------
 crazy_functions/Latex输出PDF结果.py | 23 ++++++++++-----
 crazy_functions/latex_utils.py      | 24 ++++++++-------
 crazy_functions/对话历史存档.py     |  7 ++---
 toolbox.py                          |  4 +--
 5 files changed, 63 insertions(+), 40 deletions(-)

diff --git a/crazy_functional.py b/crazy_functional.py
index 2f0fbaa..6ad2dc8 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -358,29 +358,42 @@ def get_crazy_functions():
         })
         from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
         function_plugins.update({
-            "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": {
+            "Arixv翻译（输入arxivID） [需Latex]": {
                 "Color": "stop",
                 "AsButton": False,
-                # "AdvancedArgs": True,
-                # "ArgsReminder": "",
+                "AdvancedArgs": True,
+                "ArgsReminder": 
+                    "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
+                    "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+                "Function": HotReload(Latex翻译中文并重新编译PDF)
+            }
+        })
+        function_plugins.update({
+            "本地论文翻译（上传Latex压缩包） [需Latex]": {
+                "Color": "stop",
+                "AsButton": False,
+                "AdvancedArgs": True,
+                "ArgsReminder": 
+                    "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
+                    "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
                 "Function": HotReload(Latex翻译中文并重新编译PDF)
             }
         })
     except:
         print('Load function plugin failed')
 
-    try:
-        from crazy_functions.虚空终端 import 终端
-        function_plugins.update({
-            "超级终端": {
-                "Color": "stop",
-                "AsButton": False,
-                # "AdvancedArgs": True,
-                # "ArgsReminder": "",
-                "Function": HotReload(终端)
-            }
-        })
-    except:
-        print('Load function plugin failed')
+    # try:
+    #     from crazy_functions.虚空终端 import 终端
+    #     function_plugins.update({
+    #         "超级终端": {
+    #             "Color": "stop",
+    #             "AsButton": False,
+    #             # "AdvancedArgs": True,
+    #             # "ArgsReminder": "",
+    #             "Function": HotReload(终端)
+    #         }
+    #     })
+    # except:
+    #     print('Load function plugin failed')
 
     return function_plugins
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 6c89751..214b00a 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -1,12 +1,13 @@
 from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
 from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
+from functools import partial
 import glob, os, requests, time
 pj = os.path.join
 ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
 
 # =================================== 工具函数 ===============================================
 专业词汇声明  = 'If the term "agent" is used in this section, it should be translated to "智能体". '
-def switch_prompt(pfg, mode):
+def switch_prompt(pfg, mode, more_requirement):
     """
     Generate prompts and system prompts based on the mode for proofreading or translating.
     Args:
@@ -25,7 +26,7 @@ def switch_prompt(pfg, mode):
                         f"\n\n{frag}" for frag in pfg.sp_file_contents]
         sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
     elif mode == 'translate_zh':
-        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + 
+        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement + 
                         r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
                         r"Answer me only with the translated text:" + 
                         f"\n\n{frag}" for frag in pfg.sp_file_contents]
@@ -79,7 +80,7 @@ def arxiv_download(chatbot, history, txt):
             os.makedirs(translation_dir)
         target_file = pj(translation_dir, 'translate_zh.pdf')
         if os.path.exists(target_file):
-            promote_file_to_downloadzone(target_file)
+            promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
             return target_file
         return False
     def is_float(s):
@@ -88,8 +89,10 @@ def arxiv_download(chatbot, history, txt):
             return True
         except ValueError:
             return False
-    if ('.' in txt) and ('/' not in txt) and is_float(txt):
+    if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
         txt = 'https://arxiv.org/abs/' + txt
+    if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
+        txt = 'https://arxiv.org/abs/' + txt[:10]
     if not txt.startswith('https://arxiv.org'): 
         return txt, None
     
@@ -177,7 +180,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
 
     # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
     if not os.path.exists(project_folder + '/merge_proofread.tex'):
-        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, 
+                                chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
 
 
     # <-------------- compile PDF ------------->
@@ -208,6 +212,10 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
         "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳，Linux下必须使用Docker安装，详见项目主README.md。目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。"])
     yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 
+    # <-------------- more requirements ------------->
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    more_req = plugin_kwargs.get("advanced_arg", "")
+    _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
 
     # <-------------- check deps ------------->
     try:
@@ -255,11 +263,12 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
 
     # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
     if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
-        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt)
+        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, 
+                                chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)
 
 
     # <-------------- compile PDF ------------->
-    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', 
+    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh', 
                              work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
 
     # <-------------- zip PDF ------------->
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 308044f..58ac413 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -403,7 +403,7 @@ class LatexPaperSplit():
     def __init__(self) -> None:
         self.nodes = None
         self.msg = "{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
-            "版权归原文作者所有。翻译内容可靠性无任何保障，请仔细鉴别并以原文为准。" + \
+            "版权归原文作者所有。翻译内容可靠性无保障，请仔细鉴别并以原文为准。" + \
             "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
         # 请您不要删除或修改这行警告，除非您是论文的原作者（如果您是论文原作者，欢迎加REAME中的QQ联系开发者）
         self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响，禁止移除或修改此警告。}}\\\\" 
@@ -623,7 +623,7 @@ def compile_latex_with_timeout(command, timeout=60):
         return False
     return True
 
-def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
+def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
     import os, time
     current_dir = os.getcwd()
     n_fix = 1
@@ -634,6 +634,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 
     while True:
         import os
+
         # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
         yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
         os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
@@ -655,15 +656,16 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
             os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
             os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
 
-            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
-            print(    f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
-            ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+            if mode!='translate_zh':
+                yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
+                print(    f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+                ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
 
-            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux'); os.chdir(current_dir)
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux'); os.chdir(current_dir)
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
 
         # <--------------------->
         os.chdir(current_dir)
@@ -684,7 +686,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
             result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
             if os.path.exists(pj(work_folder, '..', 'translation')):
                 shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
-            promote_file_to_downloadzone(result_pdf, chatbot)
+            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
             return True # 成功啦
         else:
             if n_fix>=max_try: break
diff --git a/crazy_functions/对话历史存档.py b/crazy_functions/对话历史存档.py
index c638d1b..fed0f8f 100644
--- a/crazy_functions/对话历史存档.py
+++ b/crazy_functions/对话历史存档.py
@@ -1,4 +1,4 @@
-from toolbox import CatchException, update_ui
+from toolbox import CatchException, update_ui, promote_file_to_downloadzone
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 import re
 
@@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
         for h in history:
             f.write("\n>>>" + h)
         f.write('</code>')
-    res = '对话历史写入：' + os.path.abspath(f'./gpt_log/{file_name}')
-    print(res)
-    return res
+    promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot)
+    return '对话历史写入：' + os.path.abspath(f'./gpt_log/{file_name}')
 
 def gen_file_preview(file_name):
     try:
diff --git a/toolbox.py b/toolbox.py
index ac49afc..ff936d6 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -444,8 +444,8 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
     import shutil
     if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
     new_path = os.path.join(f'./gpt_log/', rename_file)
-    if os.path.exists(new_path): os.remove(new_path)
-    shutil.copyfile(file, new_path)
+    if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
+    if not os.path.exists(new_path): shutil.copyfile(file, new_path)
     if chatbot:
         chatbot._cookies.update({'file_to_promote': [new_path]})
 

From cb0bb6ab4a9b458118435220086bb60cea238416 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 00:41:33 +1000
Subject: [PATCH 47/78] fix minor bugs

---
 crazy_functional.py                 | 22 ++++++-------
 crazy_functions/Latex输出PDF结果.py |  7 +++--
 crazy_functions/crazy_utils.py      | 48 +++++++++++++++++++++++++++++
 crazy_functions/latex_utils.py      | 26 ++++++++++++++++
 toolbox.py                          | 12 +++++---
 5 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/crazy_functional.py b/crazy_functional.py
index 6ad2dc8..ded0698 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -368,17 +368,17 @@ def get_crazy_functions():
                 "Function": HotReload(Latex翻译中文并重新编译PDF)
             }
         })
-        function_plugins.update({
-            "本地论文翻译（上传Latex压缩包） [需Latex]": {
-                "Color": "stop",
-                "AsButton": False,
-                "AdvancedArgs": True,
-                "ArgsReminder": 
-                    "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
-                    "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
-                "Function": HotReload(Latex翻译中文并重新编译PDF)
-            }
-        })
+        # function_plugins.update({
+        #     "本地论文翻译（上传Latex压缩包） [需Latex]": {
+        #         "Color": "stop",
+        #         "AsButton": False,
+        #         "AdvancedArgs": True,
+        #         "ArgsReminder": 
+        #             "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
+        #             "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+        #         "Function": HotReload(Latex翻译中文并重新编译PDF)
+        #     }
+        # })
     except:
         print('Load function plugin failed')
 
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 214b00a..4f19967 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -108,6 +108,7 @@ def arxiv_download(chatbot, history, txt):
         return msg, None
     # <-------------- set format ------------->
     arxiv_id = url_.split('/abs/')[-1]
+    if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
     cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
     if cached_translation_pdf: return cached_translation_pdf, arxiv_id
 
@@ -190,13 +191,14 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
     
 
     # <-------------- zip PDF ------------->
-    zip_result(project_folder)
+    zip_res = zip_result(project_folder)
     if success:
         chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
     else:
         chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
 
     # <-------------- we are done ------------->
     return success
@@ -272,13 +274,14 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
                              work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
 
     # <-------------- zip PDF ------------->
-    zip_result(project_folder)
+    zip_res = zip_result(project_folder)
     if success:
         chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
     else:
         chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
 
 
     # <-------------- we are done ------------->
diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
index 96301ff..a1b1493 100644
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -698,3 +698,51 @@ def try_install_deps(deps):
     for dep in deps:
         import subprocess, sys
         subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
+
+
+class construct_html():
+    def __init__(self) -> None:
+        self.css = """
+.row {
+  display: flex;
+  flex-wrap: wrap;
+}
+
+.column {
+  flex: 1;
+  padding: 10px;
+}
+
+.table-header {
+  font-weight: bold;
+  border-bottom: 1px solid black;
+}
+
+.table-row {
+  border-bottom: 1px solid lightgray;
+}
+
+.table-cell {
+  padding: 5px;
+}
+        """
+        self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
+
+
+    def add_row(self, a, b):
+        tmp = """
+<div class="row table-row">
+    <div class="column table-cell">REPLACE_A</div>
+    <div class="column table-cell">REPLACE_B</div>
+</div>
+        """
+        from toolbox import markdown_convertion
+        tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
+        tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
+        self.html_string += tmp
+
+
+    def save_file(self, file_name):
+        with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
+            f.write(self.html_string.encode('utf-8', 'ignore').decode())
+
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 58ac413..a1e7758 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -497,7 +497,32 @@ class LatexPaperFileGroup():
                 f.write(res)
         return manifest
 
+def write_html(sp_file_contents, sp_file_result, chatbot):
 
+    # write html
+    try:
+        import copy
+        from .crazy_utils import construct_html
+        from toolbox import gen_time_str
+        ch = construct_html() 
+        orig = ""
+        trans = ""
+        final = []
+        for c,r in zip(sp_file_contents, sp_file_result): 
+            final.append(c)
+            final.append(r)
+        for i, k in enumerate(final): 
+            if i%2==0:
+                orig = k
+            if i%2==1:
+                trans = k
+                ch.add_row(a=orig, b=trans)
+        create_report_file_name = f"{gen_time_str()}.trans.html"
+        ch.save_file(create_report_file_name)
+        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
+    except:
+        from toolbox import trimmed_format_exc
+        print('writing html result failed:', trimmed_format_exc())
 
 def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
     import time, os, re
@@ -574,6 +599,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
         pfg.get_token_num = None
         objdump(pfg, file=pj(project_folder,'temp.pkl'))
 
+    write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
 
     #  <-------- 写出文件 ----------> 
     msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
diff --git a/toolbox.py b/toolbox.py
index ff936d6..fb6aa9f 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -6,6 +6,7 @@ import re
 import os
 from latex2mathml.converter import convert as tex2mathml
 from functools import wraps, lru_cache
+pj = os.path.join
 
 """
 ========================================================================
@@ -399,7 +400,7 @@ def extract_archive(file_path, dest_dir):
                 print("Successfully extracted rar archive to {}".format(dest_dir))
         except:
             print("Rar format requires additional dependencies to install")
-            return '\n\n需要安装pip install rarfile来解压rar文件'
+            return '\n\n解压失败! 需要安装pip install rarfile来解压rar文件'
 
     # 第三方库，需要预先pip install py7zr
     elif file_extension == '.7z':
@@ -410,7 +411,7 @@ def extract_archive(file_path, dest_dir):
                 print("Successfully extracted 7z archive to {}".format(dest_dir))
         except:
             print("7z format requires additional dependencies to install")
-            return '\n\n需要安装pip install py7zr来解压7z文件'
+            return '\n\n解压失败! 需要安装pip install py7zr来解压7z文件'
     else:
         return ''
     return ''
@@ -447,7 +448,9 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
     if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
     if not os.path.exists(new_path): shutil.copyfile(file, new_path)
     if chatbot:
-        chatbot._cookies.update({'file_to_promote': [new_path]})
+        if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote']
+        else: current = []
+        chatbot._cookies.update({'file_to_promote': [new_path] + current})
 
 def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
     """
@@ -802,7 +805,8 @@ def zip_result(folder):
     import time
     t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
     zip_folder(folder, './gpt_log/', f'{t}-result.zip')
-    
+    return pj('./gpt_log/', f'{t}-result.zip')
+
 def gen_time_str():
     import time
     return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())

From d7b056576d51945808dcb99733ec7931aedad5be Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 00:52:58 +1000
Subject: [PATCH 48/78] add latex docker-compose

---
 docs/GithubAction+NoLocal+Latex | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 docs/GithubAction+NoLocal+Latex

diff --git a/docs/GithubAction+NoLocal+Latex b/docs/GithubAction+NoLocal+Latex
new file mode 100644
index 0000000..5ff9bb8
--- /dev/null
+++ b/docs/GithubAction+NoLocal+Latex
@@ -0,0 +1,25 @@
+# 此Dockerfile适用于“无本地模型”的环境构建，如果需要使用chatglm等本地模型，请参考 docs/Dockerfile+ChatGLM
+# - 1 修改 `config.py`
+# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex .
+# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
+
+FROM fuqingxu/python311_texlive_ctex:latest
+
+# 指定路径
+WORKDIR /gpt
+
+RUN pip3 install gradio openai numpy arxiv rich
+RUN pip3 install colorama Markdown pygments pymupdf
+
+# 装载项目文件
+COPY . .
+
+
+# 安装依赖
+RUN pip3 install -r requirements.txt
+
+# 可选步骤，用于预热模块
+RUN python3  -c 'from check_proxy import warm_up_modules; warm_up_modules()'
+
+# 启动
+CMD ["python3", "-u", "main.py"]

From 22a65cd1637e0d690c7db0326ddb2f5f312c0764 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Wed, 21 Jun 2023 00:55:24 +1000
Subject: [PATCH 49/78] Create build-with-latex.yml

---
 .github/workflows/build-with-latex.yml | 44 ++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 .github/workflows/build-with-latex.yml

diff --git a/.github/workflows/build-with-latex.yml b/.github/workflows/build-with-latex.yml
new file mode 100644
index 0000000..fb16d2c
--- /dev/null
+++ b/.github/workflows/build-with-latex.yml
@@ -0,0 +1,44 @@
+# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
+name: Create and publish a Docker image for Latex support
+
+on:
+  push:
+    branches:
+      - 'master'
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}_with_latex
+
+jobs:
+  build-and-push-image:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v2
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@v4
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          file: docs/GithubAction+NoLocal+Latex
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}

From 1fede6df7fc182a355fac65fc4487e1b579d7be7 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Tue, 20 Jun 2023 23:05:17 +0800
Subject: [PATCH 50/78] temp

---
 crazy_functional.py                 | 5 +++--
 crazy_functions/Latex输出PDF结果.py | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/crazy_functional.py b/crazy_functional.py
index d8ca9ae..abd44d7 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -119,6 +119,7 @@ def get_crazy_functions():
         },
         "[插件demo] 历史上的今天": {
             # HotReload 的意思是热更新，修改函数插件代码后，不需要重启程序，代码直接生效
+            "AsButton": False,  # 加入下拉菜单中
             "Function": HotReload(高阶功能模板函数)
         },
 
@@ -358,9 +359,9 @@ def get_crazy_functions():
         })
         from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
         function_plugins.update({
-            "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": {
+            "Arixv论文精细翻译": {
                 "Color": "stop",
-                "AsButton": False,
+                "AsButton": True,
                 # "AdvancedArgs": True,
                 # "ArgsReminder": "",
                 "Function": HotReload(Latex翻译中文并重新编译PDF)
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 2e9a30b..1d5e103 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -89,7 +89,7 @@ def arxiv_download(chatbot, history, txt):
         except ValueError:
             return False
     if ('.' in txt) and ('/' not in txt) and is_float(txt):
-        txt = 'https://arxiv.org/abs/' + txt
+        txt = 'https://arxiv.org/abs/' + txt.strip()
     if not txt.startswith('https://arxiv.org'): 
         return txt, None
     

From cf5f348d704cfadaeb7c86bdf43bfdc219f68a47 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 11:20:31 +1000
Subject: [PATCH 51/78] update test samples

---
 crazy_functions/crazy_functions_test.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index e743878..b4ff5e2 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -189,6 +189,7 @@ def test_Latex():
     # txt = r"https://arxiv.org/abs/2211.16068"                     #  ACE
     # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
     txt = r"https://arxiv.org/abs/2002.09253"
+    txt = r"https://arxiv.org/abs/2306.07831"
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
         cli_printer.print(cb)   #  print(cb)
 
@@ -217,6 +218,7 @@ def test_Latex():
 # test_数学动画生成manim()
 # test_Langchain知识库()
 # test_Langchain知识库读取()
-test_Latex()
-input("程序完成，回车退出。")
-print("退出。")
\ No newline at end of file
+if __name__ == "__main__":
+    test_Latex()
+    input("程序完成，回车退出。")
+    print("退出。")
\ No newline at end of file

From d87f1eb17133a31707152f84d37cf6e9d2e4e5dc Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 11:38:59 +1000
Subject: [PATCH 52/78] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A5=E5=85=A5azure?=
 =?UTF-8?q?=E7=9A=84=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md         |  11 ++--
 config.py         |  13 +++--
 docs/use_azure.md | 143 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 157 insertions(+), 10 deletions(-)
 create mode 100644 docs/use_azure.md

diff --git a/README.md b/README.md
index 7976076..7760260 100644
--- a/README.md
+++ b/README.md
@@ -186,16 +186,19 @@ docker-compose up
 2. 使用docker-compose运行。
 请阅读docker-compose.yml后，按照其中的提示操作即可
 
-3. 如何使用反代URL/微软云AzureAPI。
+3. 如何使用反代URL
 按照`config.py`中的说明配置API_URL_REDIRECT即可。
 
-4. 远程云服务器部署（需要云服务器知识与经验）。
+4. 微软云AzureAPI
+按照`config.py`中的说明配置即可（AZURE_ENDPOINT等四个配置）
+
+5. 远程云服务器部署（需要云服务器知识与经验）。
 请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
 
-5. 使用WSL2（Windows Subsystem for Linux 子系统）。
+6. 使用WSL2（Windows Subsystem for Linux 子系统）。
 请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
 
-6. 如何在二级网址（如`http://localhost/subpath`）下运行。
+7. 如何在二级网址（如`http://localhost/subpath`）下运行。
 请访问[FastAPI运行说明](docs/WithFastapi.md)
 
 ---
diff --git a/config.py b/config.py
index cb26cbb..b173862 100644
--- a/config.py
+++ b/config.py
@@ -1,12 +1,6 @@
 # [step 1]>> 例如： API_KEY = "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" （此key无效）
 API_KEY = "sk-此处填API密钥"    # 可同时填写多个API-KEY，用英文逗号分割，例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2"
 
-#增加关于AZURE的配置信息， 可以在AZURE网页中找到
-AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
-AZURE_API_KEY = "填入azure openai api的密钥"
-AZURE_API_VERSION = "填入api版本"
-AZURE_ENGINE = "填入ENGINE"
-
 
 # [step 2]>> 改为True应用代理，如果直接在海外服务器部署，此处不修改
 USE_PROXY = False
@@ -88,3 +82,10 @@ your bing cookies here
 # 如果需要使用Slack Claude，使用教程详情见 request_llm/README.md
 SLACK_CLAUDE_BOT_ID = ''   
 SLACK_CLAUDE_USER_TOKEN = ''
+
+
+# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
+AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
+AZURE_API_KEY = "填入azure openai api的密钥"
+AZURE_API_VERSION = "填入api版本"
+AZURE_ENGINE = "填入ENGINE"
diff --git a/docs/use_azure.md b/docs/use_azure.md
new file mode 100644
index 0000000..626b132
--- /dev/null
+++ b/docs/use_azure.md
@@ -0,0 +1,143 @@
+# 通过微软Azure云服务申请 Openai API
+
+由于Openai和微软的关系，现在是可以通过微软的Azure云计算服务直接访问openai的api，免去了注册和网络的问题。
+
+快速入门的官方文档的链接是：[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
+
+# 申请API
+
+按文档中的“先决条件”的介绍，出了编程的环境以外，还需要以下三个条件：
+
+1.  Azure账号并创建订阅
+
+2.  为订阅添加Azure OpenAI 服务
+
+3.  部署模型
+
+## Azure账号并创建订阅
+
+### Azure账号
+
+创建Azure的账号时最好是有微软的账号，这样似乎更容易获得免费额度（第一个月的200美元，实测了一下，如果用一个刚注册的微软账号登录Azure的话，并没有这一个月的免费额度）。
+
+创建Azure账号的网址是：[立即创建 Azure 免费帐户 | Microsoft Azure](https://azure.microsoft.com/zh-cn/free/)
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_944786_iH6AECuZ_tY0EaBd_1685327219?w=1327\&h=695\&type=image/png)
+
+打开网页后，点击 “免费开始使用” 会跳转到登录或注册页面，如果有微软的账户，直接登录即可，如果没有微软账户，那就需要到微软的网页再另行注册一个。
+
+注意，Azure的页面和政策时不时会变化，已实际最新显示的为准就好。
+
+### 创建订阅
+
+注册好Azure后便可进入主页：
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_444847_tk-9S-pxOYuaLs_K_1685327675?w=1865\&h=969\&type=image/png)
+
+首先需要在订阅里进行添加操作，点开后即可进入订阅的页面：
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_612820_z_1AlaEgnJR-rUl0_1685327892?w=1865\&h=969\&type=image/png)
+
+第一次进来应该是空的，点添加即可创建新的订阅（可以是“免费”或者“即付即用”的订阅），其中订阅ID是后面申请Azure OpenAI需要使用的。
+
+## 为订阅添加Azure OpenAI服务
+
+之后回到首页，点Azure OpenAI即可进入OpenAI服务的页面（如果不显示的话，则在首页上方的搜索栏里搜索“openai”即可）。
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_269759_nExkGcPC0EuAR5cp_1685328130?w=1865\&h=969\&type=image/png)
+
+不过现在这个服务还不能用。在使用前，还需要在这个网址申请一下：
+
+[Request Access to Azure OpenAI Service (microsoft.com)](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu)
+
+这里有二十来个问题，按照要求和自己的实际情况填写即可。
+
+其中需要注意的是
+
+1.  千万记得填对"订阅ID"
+
+2.  需要填一个公司邮箱（可以不是注册用的邮箱）和公司网址
+
+之后，在回到上面那个页面，点创建，就会进入创建页面了：
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_72708_9d9JYhylPVz3dFWL_1685328372?w=824\&h=590\&type=image/png)
+
+需要填入“资源组”和“名称”，按照自己的需要填入即可。
+
+完成后，在主页的“资源”里就可以看到刚才创建的“资源”了，点击进入后，就可以进行最后的部署了。
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_871541_CGCnbgtV9Uk1Jccy_1685329861?w=1217\&h=628\&type=image/png)
+
+## 部署模型
+
+进入资源页面后，在部署模型前，可以先点击“开发”，把密钥和终结点记下来。
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_852567_dxCZOrkMlWDSLH0d_1685330736?w=856\&h=568\&type=image/png)
+
+之后，就可以去部署模型了，点击“部署”即可，会跳转到 Azure OpenAI Stuido 进行下面的操作：
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_169225_uWs1gMhpNbnwW4h2_1685329901?w=1865\&h=969\&type=image/png)
+
+进入 Azure OpenAi Studio 后，点击新建部署，会弹出如下对话框：
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_391255_iXUSZAzoud5qlxjJ_1685330224?w=656\&h=641\&type=image/png)
+
+在这里选 gpt-35-turbo 或需要的模型并按需要填入“部署名”即可完成模型的部署。
+
+![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_724099_vBaHcUilsm1EtPgK_1685330396?w=1869\&h=482\&type=image/png)
+
+这个部署名需要记下来。
+
+到现在为止，申请操作就完成了，需要记下来的有下面几个东西：
+
+● 密钥（1或2都可以）
+
+● 终结点
+
+● 部署名（不是模型名）
+
+# API的使用
+
+接下来就是具体怎么使用API了，还是可以参考官方文档：[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
+
+和openai自己的api调用有点类似，都需要安装openai库，不同的是调用方式
+
+```
+import openai
+openai.api_type = "azure" #固定格式，无需修改
+openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点”
+openai.api_version = "2023-05-15" #固定格式，无需修改
+openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2”
+
+response = openai.ChatCompletion.create(
+    engine="gpt-35-turbo", #这里填入的不是模型名，是部署名
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
+        {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
+        {"role": "user", "content": "Do other Azure Cognitive Services support this too?"}
+    ]
+)
+
+print(response)
+print(response['choices'][0]['message']['content'])
+
+```
+
+需要注意的是：
+
+1.  engine那里填入的是部署名，不是模型名
+
+2.  通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同，不需要 decode，已经是解析好的 json 了，直接根据键值读取即可。
+
+更细节的使用方法，详见官方API文档。
+
+# 关于费用
+
+Azure OpenAI API 还是需要一些费用的（免费订阅只有1个月有效期），费用如下：
+
+![image.png](https://note.youdao.com/yws/res/18095/WEBRESOURCEeba0ab6d3127b79e143ef2d5627c0e44)
+
+具体可以可以看这个网址 ：[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable)
+
+并非网上说的什么“一年白嫖”，但注册方法以及网络问题都比直接使用openai的api要简单一些。

From cd389499035e2e2684063da6c9b8c5b24002fdfb Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 11:53:57 +1000
Subject: [PATCH 53/78] =?UTF-8?q?=E5=BD=93=E9=81=87=E5=88=B0=E9=94=99?=
 =?UTF-8?q?=E8=AF=AF=E6=97=B6=EF=BC=8C=E5=9B=9E=E6=BB=9A=E5=88=B0=E5=8E=9F?=
 =?UTF-8?q?=E6=96=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index a1e7758..48df10b 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -212,6 +212,8 @@ def fix_content(final_tex, node_string):
     final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
     final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
 
+    if "Traceback" in final_tex and "[Local Message]" in final_tex:
+        final_tex = node_string # 出问题了，还原原文
     if node_string.count('\\begin') != final_tex.count('\\begin'):
         final_tex = node_string # 出问题了，还原原文
     if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):

From 74941170aaec1838fbc0e99963588458addcc9b8 Mon Sep 17 00:00:00 2001
From: Ranhuiryan <qdgjhrh@outlook.com>
Date: Wed, 21 Jun 2023 16:19:26 +0800
Subject: [PATCH 54/78] update azure use instruction

---
 docs/use_azure.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/use_azure.md b/docs/use_azure.md
index 626b132..f1c27ef 100644
--- a/docs/use_azure.md
+++ b/docs/use_azure.md
@@ -96,6 +96,15 @@
 
 ● 部署名（不是模型名）
 
+# 修改 config.py
+
+```
+AZURE_ENDPOINT = "填入终结点"
+AZURE_API_KEY = "填入azure openai api的密钥"
+AZURE_API_VERSION = "2023-05-15"  # 默认使用 2023-05-15 版本，无需修改
+AZURE_ENGINE = "填入部署名"
+
+```
 # API的使用
 
 接下来就是具体怎么使用API了，还是可以参考官方文档：[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)

From 33d2e75aac8063f9d8fe615599fccf948b48424e Mon Sep 17 00:00:00 2001
From: Ranhuiryan <qdgjhrh@outlook.com>
Date: Wed, 21 Jun 2023 16:19:49 +0800
Subject: [PATCH 55/78] add azure-gpt35 to model list

---
 config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config.py b/config.py
index b173862..557b4e9 100644
--- a/config.py
+++ b/config.py
@@ -47,7 +47,7 @@ MAX_RETRY = 2
 
 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
 LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
+AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
 # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
 
 # 本地LLM模型如ChatGLM的执行方式 CPU/GPU

From d841d13b047207fc15e277601ab1140e33988a9e Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 22:12:44 +0800
Subject: [PATCH 56/78] add arxiv translation test samples

---
 crazy_functions/crazy_functions_test.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index b4ff5e2..6e17fb3 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -188,8 +188,13 @@ def test_Latex():
     # txt = r"https://arxiv.org/abs/2305.17608"
     # txt = r"https://arxiv.org/abs/2211.16068"                     #  ACE
     # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
-    txt = r"https://arxiv.org/abs/2002.09253"
-    txt = r"https://arxiv.org/abs/2306.07831"
+    # txt = r"https://arxiv.org/abs/2002.09253"
+    # txt = r"https://arxiv.org/abs/2306.07831"
+    # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
+    # txt = r"https://arxiv.org/abs/2212.10156"
+    txt = r"https://arxiv.org/abs/2211.11559"
+    
+
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
         cli_printer.print(cb)   #  print(cb)
 

From b8560b75101437f7ab13e478c63d6a412d815790 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 22:46:16 +0800
Subject: [PATCH 57/78] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E8=AF=AF=E5=88=A4latex?=
 =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E6=96=87=E4=BB=B6=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/crazy_functions_test.py |  1 -
 crazy_functions/latex_utils.py          | 28 ++++++++++++++++++++++---
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 6e17fb3..7edd04f 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,7 +190,6 @@ def test_Latex():
     # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
     # txt = r"https://arxiv.org/abs/2002.09253"
     # txt = r"https://arxiv.org/abs/2306.07831"
-    # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
     # txt = r"https://arxiv.org/abs/2212.10156"
     txt = r"https://arxiv.org/abs/2211.11559"
     
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 48df10b..def4be2 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -110,19 +110,41 @@ Latex Merge File
 def 寻找Latex主文件(file_manifest, mode):
     """
     在多Tex文档中，寻找主文件，必须包含documentclass，返回找到的第一个。
-    P.S. 但愿没人把latex模板放在里面传进来
+    P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
     """
+    canidates = []
     for texf in file_manifest:
         if os.path.basename(texf).startswith('merge'):
             continue
         with open(texf, 'r', encoding='utf8') as f:
             file_content = f.read()
         if r'\documentclass' in file_content:
-            return texf
+            canidates.append(texf)
         else:
             continue
-    raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
 
+    if len(canidates) == 0:
+        raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
+    elif len(canidates) == 1:
+        return canidates[0]
+    else: # if len(canidates) >= 2 通过一些Latex模板中常见（但通常不会出现在正文）的单词，对不同latex源文件扣分，取评分最高者返回
+        canidates_score = []
+        # 给出一些判定模板文档的词作为扣分项
+        unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
+        expected_words = ['\input', '\ref', '\cite']
+        for texf in canidates:
+            canidates_score.append(0)
+            with open(texf, 'r', encoding='utf8') as f:
+                file_content = f.read()
+            for uw in unexpected_words:
+                if uw in file_content:
+                    canidates_score[-1] -= 1
+            for uw in expected_words:
+                if uw in file_content:
+                    canidates_score[-1] += 1
+        select = np.argmax(canidates_score) # 取评分最高者返回
+        return canidates[select]
+    
 def rm_comments(main_file):
     new_file_remove_comment_lines = []
     for l in main_file.splitlines():

From 9f0cf9fb2b3546e13a94f6cb9d6e0fa44eaffad9 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 23:30:31 +0800
Subject: [PATCH 58/78] =?UTF-8?q?arxiv=20PDF=20=E5=BC=95=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/crazy_functions_test.py | 3 ++-
 crazy_functions/latex_utils.py          | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 7edd04f..3ef555d 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -191,7 +191,8 @@ def test_Latex():
     # txt = r"https://arxiv.org/abs/2002.09253"
     # txt = r"https://arxiv.org/abs/2306.07831"
     # txt = r"https://arxiv.org/abs/2212.10156"
-    txt = r"https://arxiv.org/abs/2211.11559"
+    # txt = r"https://arxiv.org/abs/2211.11559"
+    txt = r"https://arxiv.org/abs/2303.08774"
     
 
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index def4be2..3e4f37c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -314,6 +314,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
     text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
     text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
+    text, mask = split_worker(text, mask, r"\\includepdf\[(.*?)\]\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\item ")
     text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
     text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")

From 280e14d7b7794a2e94193d553e8bd271dd0fd3f7 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Mon, 26 Jun 2023 09:59:14 +0800
Subject: [PATCH 59/78] =?UTF-8?q?=E6=9B=B4=E6=96=B0Latex=E6=A8=A1=E5=9D=97?=
 =?UTF-8?q?=E7=9A=84docker-compose?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md          | 19 ++++++-------------
 docker-compose.yml | 27 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 7760260..b8b76c9 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ cd gpt_academic
 
 2. 配置API_KEY
 
-在`config.py`中，配置API KEY等设置，[特殊网络环境设置](https://github.com/binary-husky/gpt_academic/issues/1) 。
+在`config.py`中，配置API KEY等设置，[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。
 
 (P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件，并用其中的配置覆盖`config.py`的同名配置。因此，如果您能理解我们的配置读取逻辑，我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件，并把`config.py`中的配置转移（复制）到`config_private.py`中。`config_private.py`不受git管控，可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项，环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`)
 
@@ -140,15 +140,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-
 python main.py
 ```
 
-5. 测试函数插件
-```
-- 测试函数插件模板函数（要求gpt回答历史上的今天发生了什么），您可以根据此函数为模板，实现更复杂的功能
-    点击 "[函数插件模板Demo] 历史上的今天"
-```
-
 ## 安装-方法2：使用Docker
 
-1. 仅ChatGPT（推荐大多数人选择）
+1. 仅ChatGPT（推荐大多数人选择，等价于docker-compose方案1）
 
 ``` sh
 git clone https://github.com/binary-husky/gpt_academic.git  # 下载项目
@@ -161,26 +155,25 @@ docker run --rm -it --net=host gpt-academic
 #（最后一步-选择2）在macOS/windows环境下，只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
 ```
-P.S. 如果需要依赖Latex的插件功能，请见Wiki
+P.S. 如果需要依赖Latex的插件功能，请见Wiki。另外，您也可以直接使用docker-compose获取Latex功能（修改docker-compose.yml，保留方案4并删除其他方案）。
 
 2. ChatGPT + ChatGLM + MOSS（需要熟悉Docker）
 
 ``` sh
-# 修改docker-compose.yml，删除方案1和方案3，保留方案2。修改docker-compose.yml中方案2的配置，参考其中注释即可
+# 修改docker-compose.yml，保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置，参考其中注释即可
 docker-compose up
 ```
 
 3. ChatGPT + LLAMA + 盘古 + RWKV（需要熟悉Docker）
 ``` sh
-# 修改docker-compose.yml，删除方案1和方案2，保留方案3。修改docker-compose.yml中方案3的配置，参考其中注释即可
+# 修改docker-compose.yml，保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置，参考其中注释即可
 docker-compose up
 ```
 
 
 ## 安装-方法3：其他部署姿势
 1. 一键运行脚本。
-完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本，
-不建议电脑上已有python的用户采用此方法（在此基础上安装插件的依赖很麻烦）。
+完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。
 脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。
 
 2. 使用docker-compose运行。
diff --git a/docker-compose.yml b/docker-compose.yml
index 07f1c9f..0a0dcda 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -103,3 +103,30 @@ services:
                       echo '[jittorllms] 正在从github拉取最新代码...' &&
                       git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force &&
                       python3 -u main.py"
+
+
+## ===================================================
+## 【方案四】 chatgpt + Latex
+## ===================================================
+version: '3'
+services:
+  gpt_academic_with_latex:
+    image: ghcr.io/binary-husky/gpt_academic_with_latex:master
+    environment:
+      # 请查阅 `config.py` 以查看所有的配置信息
+      API_KEY:                  '    sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx                              '
+      USE_PROXY:                '    True                                                                             '
+      proxies:                  '    { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", }   '
+      LLM_MODEL:                '    gpt-3.5-turbo                                                                    '
+      AVAIL_LLM_MODELS:         '    ["gpt-3.5-turbo", "gpt-4"]                                                       '
+      LOCAL_MODEL_DEVICE:       '    cuda                                                                             '
+      DEFAULT_WORKER_NUM:       '    10                                                                               '
+      WEB_PORT:                 '    12303                                                                            '
+
+    # 与宿主的网络融合
+    network_mode: "host"
+
+    # 不使用代理网络拉取最新代码
+    command: >
+      bash -c "python3 -u main.py"
+

From 4290821a504ec2996241c09b262653111c7208b8 Mon Sep 17 00:00:00 2001
From: Xminry <46775500+Xminry@users.noreply.github.com>
Date: Tue, 27 Jun 2023 01:57:31 +0800
Subject: [PATCH 60/78] =?UTF-8?q?Update=20=E7=90=86=E8=A7=A3PDF=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E5=86=85=E5=AE=B9.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/理解PDF文档内容.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py
index 5050864..f1a89a7 100644
--- a/crazy_functions/理解PDF文档内容.py
+++ b/crazy_functions/理解PDF文档内容.py
@@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
     # 递归地切割PDF文件，每一块（尽量是完整的一个section，比如introduction，experiment等，必要时再进行切割）
     # 的长度必须小于 2500 个 Token
     file_content, page_one = read_and_clean_pdf_text(file_name) # （尝试）按照章节切割PDF
-
+    file_content = file_content.encode('utf-8', 'ignore').decode()   # avoid reading non-utf8 chars
+    page_one = str(page_one).encode('utf-8', 'ignore').decode()  # avoid reading non-utf8 chars
+    
     TOKEN_LIMIT_PER_FRAGMENT = 2500
 
     from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf

From f654c1af317ab6fccb40b0097800690a786d8d5d Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Tue, 27 Jun 2023 18:59:56 +0800
Subject: [PATCH 61/78] merge regex expressions

---
 crazy_functions/crazy_functions_test.py |  6 +-
 crazy_functions/latex_utils.py          | 74 ++++++++++++-------------
 2 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 3ef555d..f2d3969 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,9 +190,11 @@ def test_Latex():
     # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
     # txt = r"https://arxiv.org/abs/2002.09253"
     # txt = r"https://arxiv.org/abs/2306.07831"
-    # txt = r"https://arxiv.org/abs/2212.10156"
+    txt = r"https://arxiv.org/abs/2212.10156"
     # txt = r"https://arxiv.org/abs/2211.11559"
-    txt = r"https://arxiv.org/abs/2303.08774"
+    # txt = r"https://arxiv.org/abs/2303.08774"
+    # txt = r"https://arxiv.org/abs/2303.12712"
+    # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
     
 
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index a7eb9f2..83c4401 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -21,6 +21,7 @@ def set_forbidden_text(text, mask, pattern, flags=0):
     you can mask out (mask = PRESERVE so that text become untouchable for GPT) 
     everything between "\begin{equation}" and "\end{equation}"
     """
+    if isinstance(pattern, list): pattern = '|'.join(pattern)
     pattern_compile = re.compile(pattern, flags)
     for res in pattern_compile.finditer(text):
         mask[res.span()[0]:res.span()[1]] = PRESERVE
@@ -46,7 +47,7 @@ def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
         mask[begin:end] = PRESERVE
     return text, mask
 
-def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0):
+def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
     """
     Move area out of preserve area (make text editable for GPT)
     count the number of the braces so as to catch compelete text area. 
@@ -64,6 +65,9 @@ def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0):
             p += 1
         end = p
         mask[begin:end] = TRANSFORM
+        if forbid_wrapper:
+            mask[res.regs[0][0]:begin] = PRESERVE
+            mask[end:res.regs[0][1]] = PRESERVE
     return text, mask
 
 def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
@@ -163,6 +167,7 @@ def rm_comments(main_file):
         else:
             new_file_remove_comment_lines.append(l)
     main_file = '\n'.join(new_file_remove_comment_lines)
+    # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file)  # 将 \include 命令转换为 \input 命令
     main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
     return main_file
 
@@ -209,9 +214,11 @@ def merge_tex_files(project_foler, main_file, mode):
         main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
         main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
         # find paper abstract
-        pattern = re.compile(r'\\begin\{abstract\}.*\n')
-        match = pattern.search(main_file)
-        assert match is not None, "Cannot find paper abstract section!"
+        pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
+        pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
+        match_opt1 = pattern_opt1.search(main_file)
+        match_opt2 = pattern_opt2.search(main_file)
+        assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
     return main_file
 
 
@@ -293,48 +300,32 @@ def split_subprocess(txt, project_folder, return_dict, opts):
 
     # 吸收title与作者以上的部分
     text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
-    # 删除iffalse注释
+    # 吸收iffalse注释
     text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
     # 吸收在25行以内的begin-end组合
     text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
     # 吸收匿名公式
-    text, mask = set_forbidden_text(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\\[.*?\\\]", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$",  r"\\\[.*?\\\]" ], re.DOTALL)
     # 吸收其他杂项
-    text, mask = set_forbidden_text(text, mask, r"\\section\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\section\*\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\subsection\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\subsubsection\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\bibliography\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\bibliographystyle\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
+    text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
     text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
     text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
     text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
     text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
-    text, mask = set_forbidden_text(text, mask, r"\\includepdf\[(.*?)\]\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\item ")
-    text, mask = set_forbidden_text(text, mask, r"\\label\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\begin\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\vspace\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\hspace\{(.*?)\}")
-    text, mask = set_forbidden_text(text, mask, r"\\end\{(.*?)\}")
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
+    text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}"])
     text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
     # reverse 操作必须放在最后
-    text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
+    text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
+    text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
     root = convert_to_linklist(text, mask)
 
     # 修复括号
@@ -408,7 +399,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
         prev_node = node
         node = node.next
         if node is None: break
-
+    # 输出html调试文件，用红色标注处保留区（PRESERVE），用黑色标注转换区（TRANSFORM）
     with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
         segment_parts_for_gpt = []
         nodes = []
@@ -461,9 +452,13 @@ class LatexPaperSplit():
             pattern = re.compile(r'\\begin\{abstract\}.*\n')
             match = pattern.search(result_string)
             if not match:
-                pattern = re.compile(r'\\abstract\{')
-                match = pattern.search(result_string)
-            position = match.end()
+                # match \abstract{xxxx}
+                pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
+                match = pattern_compile.search(result_string)
+                position = match.regs[1][0]
+            else:
+                # match \begin{abstract}xxxx\end{abstract}
+                position = match.end()
             result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
         return result_string
 
@@ -482,6 +477,7 @@ class LatexPaperSplit():
             args=(txt, project_folder, return_dict, opts))
         p.start()
         p.join()
+        p.close()
         self.nodes = return_dict['nodes']
         self.sp = return_dict['segment_parts_for_gpt']
         return self.sp

From e18bef2e9c2ab1176d2df5d1c9a288ea087e2555 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Tue, 27 Jun 2023 19:16:05 +0800
Subject: [PATCH 62/78] add `item` breaker

---
 crazy_functions/latex_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 83c4401..49f547c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -302,7 +302,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
     # 吸收iffalse注释
     text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
-    # 吸收在25行以内的begin-end组合
+    # 吸收在42行以内的begin-end组合
     text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
     # 吸收匿名公式
     text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$",  r"\\\[.*?\\\]" ], re.DOTALL)
@@ -321,7 +321,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
     text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
     text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
-    text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}"])
+    text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
     text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
     # reverse 操作必须放在最后
     text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)

From 99cf7205c3059caaae0fa46f1739d602a95e1bf5 Mon Sep 17 00:00:00 2001
From: Xminry <xmrsunmoon@foxmail.com>
Date: Wed, 28 Jun 2023 10:30:08 +0800
Subject: [PATCH 63/78] =?UTF-8?q?feat:=E8=81=94=E7=BD=91=E6=90=9C=E7=B4=A2?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=8Ccn.bing.com=E7=89=88=EF=BC=8C?=
 =?UTF-8?q?=E5=9B=BD=E5=86=85=E5=8F=AF=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functional.py                     |  12 +++
 crazy_functions/联网的ChatGPT_bing版.py | 102 ++++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 crazy_functions/联网的ChatGPT_bing版.py

diff --git a/crazy_functional.py b/crazy_functional.py
index a724b97..aea97a6 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -235,6 +235,18 @@ def get_crazy_functions():
     except:
         print('Load function plugin failed')
 
+    try:
+        from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
+        function_plugins.update({
+            "连接网络回答问题_bing搜索（先输入问题，再点击按钮，搜索引擎为cn.bing.com,国内可用）": {
+                "Color": "stop",
+                "AsButton": False,  # 加入下拉菜单中
+                "Function": HotReload(连接bing搜索回答问题)
+            }
+        })
+    except:
+        print('Load function plugin failed')
+
     try:
         from crazy_functions.解析项目源代码 import 解析任意code项目
         function_plugins.update({
diff --git a/crazy_functions/联网的ChatGPT_bing版.py b/crazy_functions/联网的ChatGPT_bing版.py
new file mode 100644
index 0000000..93a84a0
--- /dev/null
+++ b/crazy_functions/联网的ChatGPT_bing版.py
@@ -0,0 +1,102 @@
+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
+import requests
+from bs4 import BeautifulSoup
+from request_llm.bridge_all import model_info
+
+
+def bing_search(query, proxies=None):
+    query = query
+    url = f"https://cn.bing.com/search?q={query}"
+    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
+    response = requests.get(url, headers=headers, proxies=proxies)
+    soup = BeautifulSoup(response.content, 'html.parser')
+    results = []
+    for g in soup.find_all('li', class_='b_algo'):
+        anchors = g.find_all('a')
+        if anchors:
+            link = anchors[0]['href']
+            if not link.startswith('http'):
+                continue
+            title = g.find('h2').text
+            item = {'title': title, 'link': link}
+            results.append(item)
+
+    for r in results:
+        print(r['link'])
+    return results
+
+
+def scrape_text(url, proxies) -> str:
+    """Scrape text from a webpage
+
+    Args:
+        url (str): The URL to scrape text from
+
+    Returns:
+        str: The scraped text
+    """
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
+        'Content-Type': 'text/plain',
+    }
+    try:
+        response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
+        if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
+    except:
+        return "无法连接到该网页"
+    soup = BeautifulSoup(response.text, "html.parser")
+    for script in soup(["script", "style"]):
+        script.extract()
+    text = soup.get_text()
+    lines = (line.strip() for line in text.splitlines())
+    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+    text = "\n".join(chunk for chunk in chunks if chunk)
+    return text
+
+@CatchException
+def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数，如温度和top_p等，一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append((f"请结合互联网信息回答以下问题：{txt}",
+                    "[Local Message] 请注意，您正在调用一个[函数插件]的模板，该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者，它可以作为创建新功能函数的模板。您若希望分享新的功能模组，请不吝PR！"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+
+    # ------------- < 第1步：爬取搜索引擎的结果 > -------------
+    from toolbox import get_conf
+    proxies, = get_conf('proxies')
+    urls = bing_search(txt, proxies)
+    history = []
+
+    # ------------- < 第2步：依次访问网页 > -------------
+    max_search_result = 8   # 最多收纳多少个网页的结果
+    for index, url in enumerate(urls[:max_search_result]):
+        res = scrape_text(url['link'], proxies)
+        history.extend([f"第{index}份搜索结果：", res])
+        chatbot.append([f"第{index}份搜索结果：", res[:500]+"......"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+
+    # ------------- < 第3步：ChatGPT综合 > -------------
+    i_say = f"从以上搜索结果中抽取信息，然后回答问题：{txt}"
+    i_say, history = input_clipping(    # 裁剪输入，从最长的条目开始裁剪，防止爆token
+        inputs=i_say,
+        history=history,
+        max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
+    )
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=i_say, inputs_show_user=i_say,
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+        sys_prompt="请从给定的若干条搜索结果中抽取信息，对最相关的两个搜索结果进行总结，然后回答问题。"
+    )
+    chatbot[-1] = (i_say, gpt_say)
+    history.append(i_say);history.append(gpt_say)
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+

From eb4c07997ece2efe35fce63b8bb7c36b6179342a Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 11:30:42 +0800
Subject: [PATCH 64/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8DLatex=E7=9F=AB=E9=94=99?=
 =?UTF-8?q?=E5=92=8C=E6=9C=AC=E5=9C=B0Latex=E8=AE=BA=E6=96=87=E7=BF=BB?=
 =?UTF-8?q?=E8=AF=91=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functional.py                     | 30 ++++++++++++-------------
 crazy_functions/Latex输出PDF结果.py     | 24 +++++++++++++++-----
 crazy_functions/crazy_functions_test.py |  4 ++--
 crazy_functions/latex_utils.py          | 10 +++++----
 4 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/crazy_functional.py b/crazy_functional.py
index a724b97..7f8c41e 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -348,17 +348,28 @@ def get_crazy_functions():
     try:
         from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
         function_plugins.update({
-            "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
+            "Latex英文纠错+高亮修正位置 [需Latex]": {
                 "Color": "stop",
                 "AsButton": False,
-                # "AdvancedArgs": True,
-                # "ArgsReminder": "",
+                "AdvancedArgs": True,
+                "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令（使用英文）。",
                 "Function": HotReload(Latex英文纠错加PDF对比)
             }
         })
         from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
         function_plugins.update({
-            "Arixv翻译（输入arxivID） [需Latex]": {
+            "Arixv翻译（输入arxivID）[需Latex]": {
+                "Color": "stop",
+                "AsButton": False,
+                "AdvancedArgs": True,
+                "ArgsReminder": 
+                    "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
+                    "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+                "Function": HotReload(Latex翻译中文并重新编译PDF)
+            }
+        })
+        function_plugins.update({
+            "本地论文翻译（上传Latex压缩包）[需Latex]": {
                 "Color": "stop",
                 "AsButton": False,
                 "AdvancedArgs": True,
@@ -368,17 +379,6 @@ def get_crazy_functions():
                 "Function": HotReload(Latex翻译中文并重新编译PDF)
             }
         })
-        # function_plugins.update({
-        #     "本地论文翻译（上传Latex压缩包） [需Latex]": {
-        #         "Color": "stop",
-        #         "AsButton": False,
-        #         "AdvancedArgs": True,
-        #         "ArgsReminder": 
-        #             "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ 
-        #             "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
-        #         "Function": HotReload(Latex翻译中文并重新编译PDF)
-        #     }
-        # })
     except:
         print('Load function plugin failed')
 
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 1886375..810d802 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -19,9 +19,9 @@ def switch_prompt(pfg, mode, more_requirement):
     - sys_prompt_array: A list of strings containing prompts for system prompts.
     """
     n_split = len(pfg.sp_file_contents)
-    if mode == 'proofread':
+    if mode == 'proofread_en':
         inputs_array = [r"Below is a section from an academic paper, proofread this section." + 
-                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
+                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
                         r"Answer me only with the revised text:" + 
                         f"\n\n{frag}" for frag in pfg.sp_file_contents]
         sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
@@ -70,6 +70,12 @@ def move_project(project_folder, arxiv_id=None):
         shutil.rmtree(new_workfolder)
     except:
         pass
+
+    # align subfolder if there is a folder wrapper
+    items = glob.glob(pj(project_folder,'*'))
+    if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
+        if os.path.isdir(items[0]): project_folder = items[0]
+
     shutil.copytree(src=project_folder, dst=new_workfolder)
     return new_workfolder
 
@@ -141,7 +147,11 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
     chatbot.append([ "函数插件功能？",
         "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。仅在Windows系统进行了测试，其他操作系统表现未知。"])
     yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
+    
+    # <-------------- more requirements ------------->
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    more_req = plugin_kwargs.get("advanced_arg", "")
+    _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
 
     # <-------------- check deps ------------->
     try:
@@ -180,13 +190,13 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
 
 
     # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
-    if not os.path.exists(project_folder + '/merge_proofread.tex'):
+    if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
         yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, 
-                                chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+                                chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
 
 
     # <-------------- compile PDF ------------->
-    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', 
+    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en', 
                              work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
     
 
@@ -195,6 +205,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
     if success:
         chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
     else:
         chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
@@ -278,6 +289,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
     if success:
         chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
     else:
         chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
         yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index f2d3969..0c623b8 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,10 +190,10 @@ def test_Latex():
     # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
     # txt = r"https://arxiv.org/abs/2002.09253"
     # txt = r"https://arxiv.org/abs/2306.07831"
-    txt = r"https://arxiv.org/abs/2212.10156"
+    # txt = r"https://arxiv.org/abs/2212.10156"
     # txt = r"https://arxiv.org/abs/2211.11559"
     # txt = r"https://arxiv.org/abs/2303.08774"
-    # txt = r"https://arxiv.org/abs/2303.12712"
+    txt = r"https://arxiv.org/abs/2303.12712"
     # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
     
 
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 49f547c..a38405c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -430,7 +430,7 @@ class LatexPaperSplit():
     """
     def __init__(self) -> None:
         self.nodes = None
-        self.msg = "{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
+        self.msg = "*{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+\Latex 翻译插件一键生成，" + \
             "版权归原文作者所有。翻译内容可靠性无保障，请仔细鉴别并以原文为准。" + \
             "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
         # 请您不要删除或修改这行警告，除非您是论文的原作者（如果您是论文原作者，欢迎加REAME中的QQ联系开发者）
@@ -741,13 +741,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
         results_ += f"对比PDF编译是否成功: {diff_pdf_success};" 
         yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
 
+        if diff_pdf_success:
+            result_pdf = pj(work_folder_modified, f'merge_diff.pdf')    # get pdf path
+            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
         if modified_pdf_success:
             yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history)    # 刷新Gradio前端界面
-            os.chdir(current_dir)
-            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
+            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
             if os.path.exists(pj(work_folder, '..', 'translation')):
                 shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
-            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
+            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)  # promote file to web UI
             return True # 成功啦
         else:
             if n_fix>=max_try: break

From 64f76e7401a099cffc2e177835bdb4d30891062d Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 11:32:19 +0800
Subject: [PATCH 65/78] 3.42

---
 version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version b/version
index ceb909a..6353b34 100644
--- a/version
+++ b/version
@@ -1,5 +1,5 @@
 {
-  "version": 3.41,
+  "version": 3.42,
   "show_feature": true,
-  "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
+  "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
 }

From 1a0009301548d9ccbaaaa0ed33fdfb62c76465b8 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 12:15:52 +0800
Subject: [PATCH 66/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/latex_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index a38405c..8b41fc9 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -430,7 +430,7 @@ class LatexPaperSplit():
     """
     def __init__(self) -> None:
         self.nodes = None
-        self.msg = "*{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+\Latex 翻译插件一键生成，" + \
+        self.msg = "*{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
             "版权归原文作者所有。翻译内容可靠性无保障，请仔细鉴别并以原文为准。" + \
             "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
         # 请您不要删除或修改这行警告，除非您是论文的原作者（如果您是论文原作者，欢迎加REAME中的QQ联系开发者）

From 49253c4dc6393b68e08a0657011aad4c36fd7957 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 12:29:49 +0800
Subject: [PATCH 67/78] [arxiv trans] add html comparison to zip file

---
 crazy_functions/latex_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 8b41fc9..69f05ff 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -532,11 +532,11 @@ class LatexPaperFileGroup():
                 f.write(res)
         return manifest
 
-def write_html(sp_file_contents, sp_file_result, chatbot):
+def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
 
     # write html
     try:
-        import copy
+        import shutil
         from .crazy_utils import construct_html
         from toolbox import gen_time_str
         ch = construct_html() 
@@ -554,6 +554,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot):
                 ch.add_row(a=orig, b=trans)
         create_report_file_name = f"{gen_time_str()}.trans.html"
         ch.save_file(create_report_file_name)
+        shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
         promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
     except:
         from toolbox import trimmed_format_exc
@@ -634,7 +635,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
         pfg.get_token_num = None
         objdump(pfg, file=pj(project_folder,'temp.pkl'))
 
-    write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
+    write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
 
     #  <-------- 写出文件 ----------> 
     msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"

From aced272d3c3d4c3b3fd250b6c97c574cd95b30f8 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 12:43:50 +0800
Subject: [PATCH 68/78] =?UTF-8?q?=E5=BE=AE=E8=B0=83=E6=8F=92=E4=BB=B6?=
 =?UTF-8?q?=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functional.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/crazy_functional.py b/crazy_functional.py
index ec3235e..03aaaf5 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -226,19 +226,15 @@ def get_crazy_functions():
     try:
         from crazy_functions.联网的ChatGPT import 连接网络回答问题
         function_plugins.update({
-            "连接网络回答问题（先输入问题，再点击按钮，需要访问谷歌）": {
+            "连接网络回答问题（输入问题后点击该插件，需要访问谷歌）": {
                 "Color": "stop",
                 "AsButton": False,  # 加入下拉菜单中
                 "Function": HotReload(连接网络回答问题)
             }
         })
-    except:
-        print('Load function plugin failed')
-
-    try:
         from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
         function_plugins.update({
-            "连接网络回答问题_bing搜索（先输入问题，再点击按钮，搜索引擎为cn.bing.com,国内可用）": {
+            "连接网络回答问题（中文Bing版，输入问题后点击该插件）": {
                 "Color": "stop",
                 "AsButton": False,  # 加入下拉菜单中
                 "Function": HotReload(连接bing搜索回答问题)

From 3b78e0538b8890d7eefa8858948117be8d4da3e1 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Thu, 29 Jun 2023 14:52:58 +0800
Subject: [PATCH 69/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=92=E4=BB=B6demo?=
 =?UTF-8?q?=E7=9A=84=E5=9B=BE=E5=83=8F=E6=98=BE=E7=A4=BA=E7=9A=84=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/高级功能函数模板.py | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py
index 7c6a7ff..73ae45f 100644
--- a/crazy_functions/高级功能函数模板.py
+++ b/crazy_functions/高级功能函数模板.py
@@ -1,6 +1,7 @@
 from toolbox import CatchException, update_ui
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-import datetime
+import datetime, re
+
 @CatchException
 def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
     """
@@ -18,12 +19,34 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
     for i in range(5):
         currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
         currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
-        i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日？列举两条并发送相关图片。发送图片时，请使用Markdown，将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
+        i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日？用中文列举两条，然后分别给出描述事件的两个英文单词。' + '当你给出关键词时，使用以下json格式：{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}。'
         gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
             inputs=i_say, inputs_show_user=i_say, 
             llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], 
-            sys_prompt="当你想发送一张照片时，请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
+            sys_prompt='输出格式示例：1908年，美国消防救援事业发展的“美国消防协会”成立。关键词：{"KeyWords":["Fire","American"]}。'
         )
+        gpt_say = get_images(gpt_say)
         chatbot[-1] = (i_say, gpt_say)
         history.append(i_say);history.append(gpt_say)
         yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+
+
+def get_images(gpt_say):
+    def get_image_by_keyword(keyword):
+        import requests
+        from bs4 import BeautifulSoup
+        response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
+        for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
+            if "data-src" in image_element: break
+        return image_element["data-src"]
+
+    for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say):
+        keywords = [n.strip('"') for n in keywords.split(',')]
+        try:
+            description = keywords[0]
+            url = get_image_by_keyword(keywords[0])
+            img_tag = f"\n\n![{description}]({url})"
+            gpt_say += img_tag
+        except:
+            continue
+    return gpt_say
\ No newline at end of file

From 37172906ef5a697d2ef3ee272147a27dd67ae138 Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Thu, 29 Jun 2023 14:55:55 +0800
Subject: [PATCH 70/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E5=AF=BC=E5=87=BA=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 toolbox.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toolbox.py b/toolbox.py
index fb6aa9f..256d99c 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
     else:
         report_files = find_recent_files('gpt_log')
     if len(report_files) == 0:
-        return None, chatbot
+        return cookies, None, chatbot
     # files.extend(report_files)
     file_links = ''
     for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'

From 22f377e2fb6bf45c2a0447c0680ee0a1eba8f6d7 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 30 Jun 2023 11:05:47 +0800
Subject: [PATCH 71/78] fix multi user cwd shift

---
 crazy_functions/crazy_functions_test.py |  3 +-
 crazy_functions/latex_utils.py          | 51 ++++++++++++++++---------
 2 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 0c623b8..60b6b87 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -193,8 +193,9 @@ def test_Latex():
     # txt = r"https://arxiv.org/abs/2212.10156"
     # txt = r"https://arxiv.org/abs/2211.11559"
     # txt = r"https://arxiv.org/abs/2303.08774"
-    txt = r"https://arxiv.org/abs/2303.12712"
+    # txt = r"https://arxiv.org/abs/2303.12712"
     # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
+    txt = r"C:\Users\fuqingxu\Desktop\9"
     
 
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 69f05ff..eb65a8a 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0):
         mask[res.span()[0]:res.span()[1]] = PRESERVE
     return text, mask
 
+def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
+    """
+    Move area out of preserve area (make text editable for GPT)
+    count the number of the braces so as to catch compelete text area. 
+    e.g.
+    \begin{abstract} blablablablablabla. \end{abstract} 
+    """
+    if isinstance(pattern, list): pattern = '|'.join(pattern)
+    pattern_compile = re.compile(pattern, flags)
+    for res in pattern_compile.finditer(text):
+        if not forbid_wrapper:
+            mask[res.span()[0]:res.span()[1]] = TRANSFORM
+        else:
+            mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE   # '\\begin{abstract}'
+            mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM   # abstract
+            mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE   # abstract
+    return text, mask
+
 def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
     """
     Add a preserve text area in this paper (text become untouchable for GPT).
@@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
     # reverse 操作必须放在最后
     text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
     text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
+    text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
     root = convert_to_linklist(text, mask)
 
     # 修复括号
@@ -672,10 +691,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
         print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
         return False, -1, [-1]
     
-
-def compile_latex_with_timeout(command, timeout=60):
+def compile_latex_with_timeout(command, cwd, timeout=60):
     import subprocess
-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
     try:
         stdout, stderr = process.communicate(timeout=timeout)
     except subprocess.TimeoutExpired:
@@ -699,24 +717,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
 
         # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
         yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
-        os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
+        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
 
         yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history)   # 刷新Gradio前端界面
-        os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+        ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
         
         if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
             # 只有第二步成功，才能继续下面的步骤
             yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history)    # 刷新Gradio前端界面
             if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
-                os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex  {main_file_original}.aux'); os.chdir(current_dir)
+                ok = compile_latex_with_timeout(f'bibtex  {main_file_original}.aux', work_folder_original)
             if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
-                os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex  {main_file_modified}.aux'); os.chdir(current_dir)
+                ok = compile_latex_with_timeout(f'bibtex  {main_file_modified}.aux', work_folder_modified)
 
             yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history)  # 刷新Gradio前端界面
-            os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
-            os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
-            os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
-            os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
+            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
+            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
+            ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
 
             if mode!='translate_zh':
                 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
@@ -724,13 +742,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
                 ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
 
                 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
-                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
-                os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux'); os.chdir(current_dir)
-                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
-                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
+                ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux', work_folder)
+                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
+                ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
 
-        # <--------------------->
-        os.chdir(current_dir)
 
         # <---------- 检查结果 ----------->
         results_ = ""
@@ -766,7 +782,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
             yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
             if not can_retry: break
 
-    os.chdir(current_dir)
     return False # 失败啦
 
 

From 403667aec18cba2d9fb719afa946168f3907124f Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Fri, 30 Jun 2023 12:06:28 +0800
Subject: [PATCH 72/78] upgrade chatglm to chatglm2

---
 request_llm/bridge_all.py     | 11 ++++++++++-
 request_llm/bridge_chatglm.py |  6 +++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index 02cfe98..d33f161 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -152,7 +152,7 @@ model_info = {
         "token_cnt": get_token_num_gpt4,
     },
 
-    # chatglm
+    # 将 chatglm 直接对齐到 chatglm2
     "chatglm": {
         "fn_with_ui": chatglm_ui,
         "fn_without_ui": chatglm_noui,
@@ -161,6 +161,15 @@ model_info = {
         "tokenizer": tokenizer_gpt35,
         "token_cnt": get_token_num_gpt35,
     },
+    "chatglm2": {
+        "fn_with_ui": chatglm_ui,
+        "fn_without_ui": chatglm_noui,
+        "endpoint": None,
+        "max_token": 1024,
+        "tokenizer": tokenizer_gpt35,
+        "token_cnt": get_token_num_gpt35,
+    },
+    
     # newbing
     "newbing": {
         "fn_with_ui": newbing_ui,
diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py
index 100783d..deaacd2 100644
--- a/request_llm/bridge_chatglm.py
+++ b/request_llm/bridge_chatglm.py
@@ -40,12 +40,12 @@ class GetGLMHandle(Process):
         while True:
             try:
                 if self.chatglm_model is None:
-                    self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+                    self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
                     device, = get_conf('LOCAL_MODEL_DEVICE')
                     if device=='cpu':
-                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
+                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
                     else:
-                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
                     self.chatglm_model = self.chatglm_model.eval()
                     break
                 else:

From ecb08e69be5b39a206b742c365379286260ecabe Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 30 Jun 2023 13:08:54 +0800
Subject: [PATCH 73/78] remove find picture core functionality

---
 core_functional.py                        |  2 +-
 crazy_functions/批量翻译PDF文档_多线程.py | 19 +++++--------------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/core_functional.py b/core_functional.py
index e126b57..7bc3582 100644
--- a/core_functional.py
+++ b/core_functional.py
@@ -63,6 +63,7 @@ def get_core_functions():
             "Prefix":   r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL，" +
                         r"然后请使用Markdown格式封装，并且不要有反斜线，不要用代码块。现在，请按以下描述给我发送图片：" + "\n\n",
             "Suffix":   r"",
+            "Visible": False,
         },
         "解释代码": {
             "Prefix":   r"请解释以下代码：" + "\n```\n",
@@ -73,6 +74,5 @@ def get_core_functions():
                         r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
                         r"Items need to be transformed:",
             "Suffix":   r"",
-            "Visible": False,
         }
     }
diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py
index 06d8a5a..0adac96 100644
--- a/crazy_functions/批量翻译PDF文档_多线程.py
+++ b/crazy_functions/批量翻译PDF文档_多线程.py
@@ -1,5 +1,5 @@
 from toolbox import CatchException, report_execption, write_results_to_file
-from toolbox import update_ui
+from toolbox import update_ui, promote_file_to_downloadzone
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
 from .crazy_utils import read_and_clean_pdf_text
@@ -147,23 +147,14 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
             print('writing html result failed:', trimmed_format_exc())
 
     # 准备文件的下载
-    import shutil
     for pdf_path in generated_conclusion_files:
         # 重命名文件
-        rename_file = f'./gpt_log/翻译-{os.path.basename(pdf_path)}'
-        if os.path.exists(rename_file):
-            os.remove(rename_file)
-        shutil.copyfile(pdf_path, rename_file)
-        if os.path.exists(pdf_path):
-            os.remove(pdf_path)
+        rename_file = f'翻译-{os.path.basename(pdf_path)}'
+        promote_file_to_downloadzone(pdf_path, rename_file=rename_file, chatbot=chatbot)
     for html_path in generated_html_files:
         # 重命名文件
-        rename_file = f'./gpt_log/翻译-{os.path.basename(html_path)}'
-        if os.path.exists(rename_file):
-            os.remove(rename_file)
-        shutil.copyfile(html_path, rename_file)
-        if os.path.exists(html_path):
-            os.remove(html_path)
+        rename_file = f'翻译-{os.path.basename(html_path)}'
+        promote_file_to_downloadzone(html_path, rename_file=rename_file, chatbot=chatbot)
     chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
     yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
 

From df3f1aa3cac73b5906e36b446ee4cb6edd91af6a Mon Sep 17 00:00:00 2001
From: binary-husky <qingxu.fu@outlook.com>
Date: Fri, 30 Jun 2023 14:56:22 +0800
Subject: [PATCH 74/78] =?UTF-8?q?=E6=9B=B4=E6=AD=A3ChatGLM2=E7=9A=84?=
 =?UTF-8?q?=E9=BB=98=E8=AE=A4Token=E6=95=B0=E9=87=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index 65e1f4c..2144010 100644
--- a/main.py
+++ b/main.py
@@ -104,7 +104,7 @@ def main():
                     system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
                     top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
                     temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
-                    max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",)
+                    max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
                     checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
                     md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
 

From a1092d8f924420d243e36dca22ccfd3037df821a Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Sat, 1 Jul 2023 00:17:26 +0800
Subject: [PATCH 75/78] =?UTF-8?q?=E6=8F=90=E4=BE=9B=E8=87=AA=E5=8A=A8?=
 =?UTF-8?q?=E6=B8=85=E7=A9=BA=E8=BE=93=E5=85=A5=E6=A1=86=E7=9A=84=E9=80=89?=
 =?UTF-8?q?=E9=A1=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 config.py | 3 +++
 main.py   | 9 +++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/config.py b/config.py
index 58e0e09..f187a0c 100644
--- a/config.py
+++ b/config.py
@@ -56,6 +56,9 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
 # 设置gradio的并行线程数（不需要修改）
 CONCURRENT_COUNT = 100
 
+# 是否在提交时自动清空输入框
+AUTO_CLEAR_TXT = False
+
 # 加一个live2d装饰
 ADD_WAIFU = False
 
diff --git a/main.py b/main.py
index 65e1f4c..f1b7f45 100644
--- a/main.py
+++ b/main.py
@@ -6,8 +6,8 @@ def main():
     from request_llm.bridge_all import predict
     from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
     # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
-    proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \
-        get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS')
+    proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \
+        get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
 
     # 如果WEB_PORT是-1, 则随机选取WEB端口
     PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
@@ -144,6 +144,11 @@ def main():
         resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
         clearBtn.click(lambda: ("",""), None, [txt, txt2])
         clearBtn2.click(lambda: ("",""), None, [txt, txt2])
+        if AUTO_CLEAR_TXT:
+            submitBtn.click(lambda: ("",""), None, [txt, txt2])
+            submitBtn2.click(lambda: ("",""), None, [txt, txt2])
+            txt.submit(lambda: ("",""), None, [txt, txt2])
+            txt2.submit(lambda: ("",""), None, [txt, txt2])
         # 基础功能区的回调函数注册
         for k in functional:
             if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue

From d7ac99f603bef9eeae26b22df4891d562cd829d4 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Sat, 1 Jul 2023 01:46:43 +0800
Subject: [PATCH 76/78] =?UTF-8?q?=E6=9B=B4=E6=AD=A3=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/crazy_functions_test.py | 2 +-
 crazy_functions/latex_utils.py          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 60b6b87..a10f3c2 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -195,7 +195,7 @@ def test_Latex():
     # txt = r"https://arxiv.org/abs/2303.08774"
     # txt = r"https://arxiv.org/abs/2303.12712"
     # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
-    txt = r"C:\Users\fuqingxu\Desktop\9"
+    txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误！
     
 
     for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index eb65a8a..be5a367 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -203,6 +203,7 @@ def merge_tex_files_(project_foler, main_file, mode):
                 c = fx.read()
         else:  
             # e.g., \input{srcs/07_appendix}
+            assert os.path.exists(fp+'.tex'), f'即找不到{fp}，也找不到{fp}.tex，Tex源文件缺失！'
             with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
                 c = fx.read()
         c = merge_tex_files_(project_foler, c, mode)

From 41c10f5688fe5e9993ec16723f1191b07798b9c0 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Sat, 1 Jul 2023 02:28:32 +0800
Subject: [PATCH 77/78] report image generation error in UI

---
 crazy_functions/图片生成.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py
index 5bf8bc4..1bf53f4 100644
--- a/crazy_functions/图片生成.py
+++ b/crazy_functions/图片生成.py
@@ -27,8 +27,10 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"):
     }
     response = requests.post(url, headers=headers, json=data, proxies=proxies)
     print(response.content)
-    image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
-
+    try:
+        image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
+    except:
+        raise RuntimeError(response.content.decode())
     # 文件保存到本地
     r = requests.get(image_url, proxies=proxies)
     file_path = 'gpt_log/image_gen/'

From 5f7ffef2385786f91a741dc41de223492fe7a66a Mon Sep 17 00:00:00 2001
From: w_xiaolizu <w_xiaolizu@kingsoft.com>
Date: Fri, 21 Apr 2023 17:09:49 +0800
Subject: [PATCH 78/78] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9F=BA=E7=A1=80?=
 =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=88=A4=E7=A9=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crazy_functions/辅助回答.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 crazy_functions/辅助回答.py

diff --git a/crazy_functions/辅助回答.py b/crazy_functions/辅助回答.py
new file mode 100644
index 0000000..b635f88
--- /dev/null
+++ b/crazy_functions/辅助回答.py
@@ -0,0 +1,28 @@
+# encoding: utf-8
+# @Time   : 2023/4/19
+# @Author : Spike
+# @Descr   :
+from toolbox import update_ui
+from toolbox import CatchException, report_execption, write_results_to_file
+from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+
+
+@CatchException
+def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    if txt:
+        show_say = txt
+        prompt = txt+'\n回答完问题后，再列出用户可能提出的三个问题。'
+    else:
+        prompt = history[-1]+"\n分析上述回答，再列出用户可能提出的三个问题。"
+        show_say = '分析上述回答，再列出用户可能提出的三个问题。'
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=prompt,
+        inputs_show_user=show_say,
+        llm_kwargs=llm_kwargs,
+        chatbot=chatbot,
+        history=history,
+        sys_prompt=system_prompt
+    )
+    chatbot[-1] = (show_say, gpt_say)
+    history.extend([show_say, gpt_say])
+    yield from update_ui(chatbot=chatbot, history=history)  # 刷新界面
\ No newline at end of file