合并master

2023-06-04 20:26:04 +08:00
parent 12b9c94c60 27f97ba92a
commit 8ab0c1b6e5
14 changed files with 1325 additions and 103 deletions
--- a/crazy_functions/Langchain知识库.py
+++ b/crazy_functions/Langchain知识库.py
@ -0,0 +1,98 @@
+from toolbox import CatchException, update_ui, ProxyNetworkActivate
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
+
+
+
+@CatchException
+def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    """
+    txt             输入栏用户输入的文本，例如需要翻译的一段话，再例如一个包含了待处理文件的路径
+    llm_kwargs      gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
+    plugin_kwargs   插件模型的参数，暂时没有用武之地
+    chatbot         聊天显示框的句柄，用于显示给用户
+    history         聊天历史，前情提要
+    system_prompt   给gpt的静默提醒
+    web_port        当前软件运行的端口号
+    """
+    history = []    # 清空历史，以免输入溢出
+    chatbot.append(("这是什么功能？", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    # resolve deps
+    try:
+        from zh_langchain import construct_vector_store
+        from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+        from .crazy_utils import knowledge_archive_interface
+    except Exception as e:
+        chatbot.append(
+            ["依赖不足", 
+             "导入依赖失败。正在尝试自动安装，请查看终端的输出或耐心等待..."]
+        )
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        from .crazy_utils import try_install_deps
+        try_install_deps(['zh_langchain==0.2.0'])
+    
+    # < --------------------读取参数--------------- >
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    kai_id = plugin_kwargs.get("advanced_arg", 'default')
+
+    # < --------------------读取文件--------------- >
+    file_manifest = []
+    spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
+    for sp in spl:
+        _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
+        file_manifest += file_manifest_tmp
+    
+    if len(file_manifest) == 0:
+        chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    
+    # < -------------------预热文本向量化模组--------------- >
+    chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    print('Checking Text2vec ...')
+    from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+    with ProxyNetworkActivate():    # 临时地激活代理网络
+        HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
+
+    # < -------------------构建知识库--------------- >
+    chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    print('Establishing knowledge archive ...')
+    with ProxyNetworkActivate():    # 临时地激活代理网络
+        kai = knowledge_archive_interface()
+        kai.feed_archive(file_manifest=file_manifest, id=kai_id)
+    kai_files = kai.get_loaded_file()
+    kai_files = '<br/>'.join(kai_files)
+    # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
+    # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
+    # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答'
+    # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了，刷新页面即可以退出知识库问答模式。"])
+    chatbot.append(['构建完成', f"当前知识库内的有效文件：\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+
+@CatchException
+def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
+
+    # < -------------------  --------------- >
+    from .crazy_utils import knowledge_archive_interface
+    kai = knowledge_archive_interface()
+
+    if 'langchain_plugin_embedding' in chatbot._cookies:
+        resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding'])
+    else:
+        if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+        kai_id = plugin_kwargs.get("advanced_arg", 'default')
+        resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
+
+    chatbot.append((txt, '[Local Message] ' + prompt))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
+    gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+        inputs=prompt, inputs_show_user=txt, 
+        llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], 
+        sys_prompt=system_prompt
+    )
+    history.extend((prompt, gpt_say))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间，我们先及时地做一次界面更新
--- a/crazy_functions/Latex全文润色.py
+++ b/crazy_functions/Latex全文润色.py
@ -238,3 +238,6 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
    yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')
+
+
+
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@ -0,0 +1,263 @@
+from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
+from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
+import glob, os, requests, time
+pj = os.path.join
+ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
+
+# =================================== 工具函数 ===============================================
+沙雕GPT啊别犯这些低级翻译错误  = 'You must to translate "agent" to "智能体". '
+def switch_prompt(pfg, mode):
+    """
+    Generate prompts and system prompts based on the mode for proofreading or translating.
+    Args:
+    - pfg: Proofreader or Translator instance.
+    - mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
+
+    Returns:
+    - inputs_array: A list of strings containing prompts for users to respond to.
+    - sys_prompt_array: A list of strings containing prompts for system prompts.
+    """
+    n_split = len(pfg.sp_file_contents)
+    if mode == 'proofread':
+        inputs_array = [r"Below is a section from an academic paper, proofread this section." + 
+                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
+                        r"Answer me only with the revised text:" + 
+                        f"\n\n{frag}" for frag in pfg.sp_file_contents]
+        sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
+    elif mode == 'translate_zh':
+        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese." + 沙雕GPT啊别犯这些低级翻译错误 + 
+                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
+                        r"Answer me only with the translated text:" + 
+                        f"\n\n{frag}" for frag in pfg.sp_file_contents]
+        sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
+    else:
+        assert False, "未知指令"
+    return inputs_array, sys_prompt_array
+
+def desend_to_extracted_folder_if_exist(project_folder):
+    """ 
+    Descend into the extracted folder if it exists, otherwise return the original folder.
+
+    Args:
+    - project_folder: A string specifying the folder path.
+
+    Returns:
+    - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
+    """
+    maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
+    if len(maybe_dir) == 0: return project_folder
+    if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
+    return project_folder
+
+def move_project(project_folder, arxiv_id=None):
+    """ 
+    Create a new work folder and copy the project folder to it.
+
+    Args:
+    - project_folder: A string specifying the folder path of the project.
+
+    Returns:
+    - A string specifying the path to the new work folder.
+    """
+    import shutil, time
+    time.sleep(2)   # avoid time string conflict
+    if arxiv_id is not None:
+        new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
+    else:
+        new_workfolder = f'gpt_log/{gen_time_str()}'
+    try: shutil.rmtree(new_workfolder)
+    except: pass
+    shutil.copytree(src=project_folder, dst=new_workfolder)
+    return new_workfolder
+
+def arxiv_download(chatbot, history, txt):
+    def check_cached_translation_pdf(arxiv_id):
+        translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
+        if not os.path.exists(translation_dir):
+            os.makedirs(translation_dir)
+        target_file = pj(translation_dir, 'translate_zh.pdf')
+        if os.path.exists(target_file):
+            promote_file_to_downloadzone(target_file)
+            return target_file
+        return False
+    
+    if not txt.startswith('https://arxiv.org'): 
+        return txt, None
+    
+    # <-------------- inspect format ------------->
+    chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...']) 
+    yield from update_ui(chatbot=chatbot, history=history)
+    time.sleep(1) # 刷新界面
+
+    url_ = txt   # https://arxiv.org/abs/1707.06690
+    if not txt.startswith('https://arxiv.org/abs/'): 
+        msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}"
+        yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
+        return msg, None
+    # <-------------- set format ------------->
+    arxiv_id = url_.split('/abs/')[-1]
+    cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
+    if cached_translation_pdf: return cached_translation_pdf, arxiv_id
+
+    url_tar = url_.replace('/abs/', '/e-print/')
+    translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
+    extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
+    os.makedirs(translation_dir, exist_ok=True)
+    
+    # <-------------- download arxiv source file ------------->
+    dst = pj(translation_dir, arxiv_id+'.tar')
+    if os.path.exists(dst):
+        yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history)  # 刷新界面
+    else:
+        yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history)  # 刷新界面
+        proxies, = get_conf('proxies')
+        r = requests.get(url_tar, proxies=proxies)
+        with open(dst, 'wb+') as f:
+            f.write(r.content)
+    # <-------------- extract file ------------->
+    yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history)  # 刷新界面
+    from toolbox import extract_archive
+    extract_archive(file_path=dst, dest_dir=extract_dst)
+    return extract_dst, arxiv_id
+# ========================================= 插件主程序1 =====================================================    
+
+
+@CatchException
+def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    # <-------------- information about this plugin ------------->
+    chatbot.append([ "函数插件功能？",
+        "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。仅在Windows系统进行了测试，其他操作系统表现未知。"])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+
+    # <-------------- check deps ------------->
+    try:
+        import glob, os, time
+        os.system(f'pdflatex -version')
+        from .latex_utils import Latex精细分解与转化, 编译Latex差别
+    except Exception as e:
+        chatbot.append([ f"解析项目: {txt}",
+            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    
+
+    # <-------------- clear history and read input ------------->
+    history = []
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    
+
+    # <-------------- if is a zip/tar file ------------->
+    project_folder = desend_to_extracted_folder_if_exist(project_folder)
+
+
+    # <-------------- move latex project away from temp folder ------------->
+    project_folder = move_project(project_folder, arxiv_id=None)
+
+
+    # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
+    if not os.path.exists(project_folder + '/merge_proofread.tex'):
+        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+
+
+    # <-------------- compile PDF ------------->
+    success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', 
+                             work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
+    
+
+    # <-------------- zip PDF ------------->
+    zip_result(project_folder)
+    if success:
+        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
+        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+    else:
+        chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
+        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+
+    # <-------------- we are done ------------->
+    return success
+
+
+# ========================================= 插件主程序2 =====================================================    
+
+@CatchException
+def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    # <-------------- information about this plugin ------------->
+    chatbot.append([
+        "函数插件功能？",
+        "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。仅在Windows系统进行了测试，其他操作系统表现未知。"])
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+
+    # <-------------- check deps ------------->
+    try:
+        import glob, os, time
+        os.system(f'pdflatex -version')
+        from .latex_utils import Latex精细分解与转化, 编译Latex差别
+    except Exception as e:
+        chatbot.append([ f"解析项目: {txt}",
+            f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    
+
+    # <-------------- clear history and read input ------------->
+    history = []
+    txt, arxiv_id = yield from arxiv_download(chatbot, history, txt)
+    if txt.endswith('.pdf'):
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    
+
+    # <-------------- if is a zip/tar file ------------->
+    project_folder = desend_to_extracted_folder_if_exist(project_folder)
+
+
+    # <-------------- move latex project away from temp folder ------------->
+    project_folder = move_project(project_folder, arxiv_id)
+
+
+    # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
+    if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
+        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt)
+
+
+    # <-------------- compile PDF ------------->
+    success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', 
+                             work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
+
+    # <-------------- zip PDF ------------->
+    zip_result(project_folder)
+    if success:
+        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
+        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+    else:
+        chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
+        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+
+    # <-------------- we are done ------------->
+    return success
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@ -3,6 +3,8 @@
    这个文件用于函数插件的单元测试
    运行方法 python crazy_functions/crazy_functions_test.py
 """
+    
+# ==============================================================================================================================

 def validate_path():
    import os, sys
@ -10,10 +12,16 @@ def validate_path():
    root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
    os.chdir(root_dir_assume)
    sys.path.append(root_dir_assume)
-    
 validate_path() # validate path so you can run from base directory
+
+# ==============================================================================================================================
+
 from colorful import *
 from toolbox import get_conf, ChatBotWithCookies
+import contextlib
+import os
+import sys
+from functools import wraps
 proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
    get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')

@ -30,7 +38,43 @@ history = []
 system_prompt = "Serve me as a writing and programming assistant."
 web_port = 1024

+# ==============================================================================================================================

+def silence_stdout(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        _original_stdout = sys.stdout
+        sys.stdout = open(os.devnull, 'w')
+        for q in func(*args, **kwargs):
+            sys.stdout = _original_stdout
+            yield q
+            sys.stdout = open(os.devnull, 'w')
+        sys.stdout.close()
+        sys.stdout = _original_stdout
+    return wrapper
+
+class CLI_Printer():
+    def __init__(self) -> None:
+        self.pre_buf = ""
+
+    def print(self, buf):
+        bufp = ""
+        for index, chat in enumerate(buf):
+            a, b = chat
+            bufp += sprint亮靛('[Me]:' + a) + '\n'
+            bufp += '[GPT]:' + b
+            if index < len(buf)-1: 
+                bufp += '\n'
+
+        if self.pre_buf!="" and bufp.startswith(self.pre_buf):
+            print(bufp[len(self.pre_buf):], end='')
+        else:
+            print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='')
+        self.pre_buf = bufp
+        return
+    
+cli_printer = CLI_Printer()
+# ==============================================================================================================================
 def test_解析一个Python项目():
    from crazy_functions.解析项目源代码 import 解析一个Python项目
    txt = "crazy_functions/test_project/python/dqn"
@ -116,6 +160,52 @@ def test_Markdown多语言():
        for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
            print(cb)

+def test_Langchain知识库():
+    from crazy_functions.Langchain知识库 import 知识库问答
+    txt = "./"
+    chatbot = ChatBotWithCookies(llm_kwargs)
+    for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)   #  print(cb)
+    
+    chatbot = ChatBotWithCookies(cookies)
+    from crazy_functions.Langchain知识库 import 读取知识库作答
+    txt = "What is the installation method？"
+    for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)   #  print(cb)
+
+def test_Langchain知识库读取():
+    from crazy_functions.Langchain知识库 import 读取知识库作答
+    txt = "远程云服务器部署？"
+    for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)   #  print(cb)
+
+def test_Latex():
+    from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF
+    txt = "C:/Users/fuqingxu/Desktop/proofread"
+    txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/paperx"
+    txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery"
+    txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06"
+    txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20"
+    txt = r"https://arxiv.org/abs/1902.03185"
+    txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40"
+    txt = r"https://arxiv.org/abs/2305.18290"
+    txt = r"https://arxiv.org/abs/2305.17608"
+    # txt = r"https://arxiv.org/abs/2306.00324"
+    txt = r"https://arxiv.org/abs/2211.16068"
+    
+    for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)   #  print(cb)
+
+
+
+    # txt = "2302.02948.tar"
+    # print(txt)
+    # main_tex, work_folder = Latex预处理(txt)
+    # print('main tex:', main_tex)
+    # res = 编译Latex(main_tex, work_folder)
+    # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    #     cli_printer.print(cb)   #  print(cb)
+


 # test_解析一个Python项目()
@ -129,7 +219,8 @@ def test_Markdown多语言():
 # test_联网回答问题()
 # test_解析ipynb文件()
 # test_数学动画生成manim()
-test_Markdown多语言()
-
+# test_Langchain知识库()
+# test_Langchain知识库读取()
+test_Latex()
 input("程序完成，回车退出。")
 print("退出。")
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@ -1,4 +1,5 @@
 from toolbox import update_ui, get_conf, trimmed_format_exc
+import threading

 def input_clipping(inputs, history, max_token_limit):
    import numpy as np
@ -606,3 +607,94 @@ def get_files_from_everything(txt, type): # type='.md'
        success = False

    return success, file_manifest, project_folder
+
+
+
+
+def Singleton(cls):
+    _instance = {}
+ 
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+ 
+    return _singleton
+
+
+@Singleton
+class knowledge_archive_interface():
+    def __init__(self) -> None:
+        self.threadLock = threading.Lock()
+        self.current_id = ""
+        self.kai_path = None
+        self.qa_handle = None
+        self.text2vec_large_chinese = None
+
+    def get_chinese_text2vec(self):
+        if self.text2vec_large_chinese is None:
+            # < -------------------预热文本向量化模组--------------- >
+            from toolbox import ProxyNetworkActivate
+            print('Checking Text2vec ...')
+            from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+            with ProxyNetworkActivate():    # 临时地激活代理网络
+                self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
+
+        return self.text2vec_large_chinese
+
+
+    def feed_archive(self, file_manifest, id="default"):
+        self.threadLock.acquire()
+        # import uuid
+        self.current_id = id
+        from zh_langchain import construct_vector_store
+        self.qa_handle, self.kai_path = construct_vector_store(   
+            vs_id=self.current_id, 
+            files=file_manifest, 
+            sentence_size=100,
+            history=[],
+            one_conent="",
+            one_content_segmentation="",
+            text2vec = self.get_chinese_text2vec(),
+        )
+        self.threadLock.release()
+
+    def get_current_archive_id(self):
+        return self.current_id
+    
+    def get_loaded_file(self):
+        return self.qa_handle.get_loaded_file()
+
+    def answer_with_archive_by_id(self, txt, id):
+        self.threadLock.acquire()
+        if not self.current_id == id:
+            self.current_id = id
+            from zh_langchain import construct_vector_store
+            self.qa_handle, self.kai_path = construct_vector_store(   
+                vs_id=self.current_id, 
+                files=[], 
+                sentence_size=100,
+                history=[],
+                one_conent="",
+                one_content_segmentation="",
+                text2vec = self.get_chinese_text2vec(),
+            )
+        VECTOR_SEARCH_SCORE_THRESHOLD = 0
+        VECTOR_SEARCH_TOP_K = 4
+        CHUNK_SIZE = 512
+        resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
+            query = txt,
+            vs_path = self.kai_path,
+            score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
+            vector_search_top_k=VECTOR_SEARCH_TOP_K, 
+            chunk_conent=True,
+            chunk_size=CHUNK_SIZE,
+            text2vec = self.get_chinese_text2vec(),
+        )
+        self.threadLock.release()
+        return resp, prompt
+
+def try_install_deps(deps):
+    for dep in deps:
+        import subprocess, sys
+        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@ -0,0 +1,606 @@
+from toolbox import update_ui, update_ui_lastest_msg    # 刷新Gradio前端界面
+from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
+import os, shutil
+import re
+pj = os.path.join
+
+def 寻找Latex主文件(file_manifest, mode):
+    """
+    在多Tex文档中，寻找主文件，必须包含documentclass，返回找到的第一个。
+    P.S. 但愿没人把latex模板放在里面传进来
+    """
+    for texf in file_manifest:
+        if os.path.basename(texf).startswith('merge'):
+            continue
+        with open(texf, 'r', encoding='utf8') as f:
+            file_content = f.read()
+        if r'\documentclass' in file_content:
+            return texf
+        else:
+            continue
+    raise RuntimeError('无法找到一个主Tex文件（包含documentclass关键字）')
+
+def merge_tex_files_(project_foler, main_file, mode):
+    """
+    递归地把多Tex工程整合为一个Tex文档
+    """
+    for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
+        f = s.group(1)
+        fp = os.path.join(project_foler, f)
+        if os.path.exists(fp):  
+            # e.g., \input{srcs/07_appendix.tex}
+            with open(fp, 'r', encoding='utf-8', errors='replace') as fx:
+                c = fx.read()
+        else:  
+            # e.g., \input{srcs/07_appendix}
+            with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
+                c = fx.read()
+        c = merge_tex_files_(project_foler, c, mode)
+        main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
+    return main_file
+
+def merge_tex_files(project_foler, main_file, mode):
+    """
+    递归地把多Tex工程整合为一个Tex文档（递归外层）
+    P.S. 顺便把CTEX塞进去以支持中文
+    P.S. 顺便把Latex的注释去除
+    """
+    main_file = merge_tex_files_(project_foler, main_file, mode)
+    if mode == 'translate_zh':
+        pattern = re.compile(r'\\documentclass.*\n')
+        match = pattern.search(main_file)
+        position = match.end()
+        main_file = main_file[:position] + '\\usepackage{CTEX}\n\\usepackage{url}\n' + main_file[position:]
+
+    new_file_remove_comment_lines = []
+    for l in main_file.splitlines():
+        # 删除整行的空注释
+        if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")):
+            pass
+        else:
+            new_file_remove_comment_lines.append(l)
+    main_file = '\n'.join(new_file_remove_comment_lines)
+    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
+    return main_file
+
+
+class LinkedListNode():
+    """
+    链表单元
+    """
+    def __init__(self, string, preserve=True) -> None:
+        self.string = string
+        self.preserve = preserve
+        self.next = None
+
+
+def mod_inbraket(match):
+    """
+    为啥chatgpt会把cite里面的逗号换成中文逗号呀 艹
+    """
+    # get the matched string
+    cmd = match.group(1)
+    str_to_modify = match.group(2)
+    # modify the matched string
+    str_to_modify = str_to_modify.replace('：', ':')    # 前面是中文冒号，后面是英文冒号
+    str_to_modify = str_to_modify.replace('，', ',')    # 前面是中文逗号，后面是英文逗号
+    # str_to_modify = 'BOOM'
+    return "\\" + cmd + "{" + str_to_modify + "}"
+
+def fix_content(final_tex, node_string):
+    """
+    Fix common GPT errors to increase success rate
+    """
+    final_tex = final_tex.replace('%', r'\%')
+    final_tex = final_tex.replace(r'\%', r'\\%')
+    final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
+    final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
+    final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
+    if node_string.count('{') != node_string.count('}'):
+        if final_tex.count('{') != node_string.count('{'):
+            final_tex = node_string # 出问题了，还原原文
+        if final_tex.count('}') != node_string.count('}'):
+            final_tex = node_string # 出问题了，还原原文
+
+    return final_tex
+
+class LatexPaperSplit():
+    """
+    将Latex文档分解到一个链表中，每个链表节点用preserve的标志位提示它是否应当被GPT处理
+    """
+    def __init__(self) -> None:
+        """
+        root是链表的根节点
+        """
+        self.root = None
+
+    def merge_result(self, arr, mode, msg):
+        """
+        将GPT处理后的结果融合
+        """
+        result_string = ""
+        node = self.root
+        p = 0
+        while True:
+            if node.preserve:
+                result_string += node.string
+            else:
+                result_string += fix_content(arr[p], node.string)
+                p += 1
+            node = node.next
+            if node is None: break
+        if mode == 'translate_zh':
+            try:
+                pattern = re.compile(r'\\begin\{abstract\}.*\n')
+                match = pattern.search(result_string)
+                position = match.end()
+                result_string = result_string[:position] + \
+                    "{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，其内容可靠性没有任何保障，请仔细鉴别并以原文为准。" + \
+                    "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"            + \
+                    msg + \
+                    "为了防止大语言模型的意外谬误产生扩散影响，禁止移除或修改此警告。}}\\\\"    + \
+                    result_string[position:]
+            except:
+                pass
+        return result_string
+
+    def split(self, txt, project_folder):
+        """
+        将Latex文档分解到一个链表中，每个链表节点用preserve的标志位提示它是否应当被GPT处理
+        """
+        root = LinkedListNode(txt, False)
+        def split_worker(root, pattern, flags=0):
+            lt = root
+            cnt = 0
+            pattern_compile = re.compile(pattern, flags)
+            while True:
+                if not lt.preserve:
+                    while True:
+                        res = pattern_compile.search(lt.string)
+                        if not res: break
+                        before = res.string[:res.span()[0]]
+                        this = res.group(0)
+                        after = res.string[res.span()[1]:]
+                        # ======
+                        lt.string = before
+                        tmp  = lt.next
+                        # ======
+                        mid = LinkedListNode(this, True)
+                        lt.next = mid
+                        # ======
+                        aft = LinkedListNode(after, False)
+                        mid.next = aft
+                        aft.next = tmp
+                        # ======
+                        lt = aft
+                lt = lt.next
+                cnt += 1
+                # print(cnt)
+                if lt is None: break
+
+        def split_worker_begin_end(root, pattern, flags=0, limit_n_lines=25):
+            lt = root
+            cnt = 0
+            pattern_compile = re.compile(pattern, flags)
+            while True:
+                if not lt.preserve:
+                    while True:
+                        target_string = lt.string
+
+                        def search_with_line_limit(target_string):
+                            for res in pattern_compile.finditer(target_string):
+                                cmd = res.group(1) # begin{what}
+                                this = res.group(2) # content between begin and end
+                                white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 'em', 'emph', 'textit', 'textbf']
+                                if cmd in white_list or this.count('\n') > 25:
+                                    sub_res = search_with_line_limit(this)
+                                    if not sub_res: continue
+                                    else: return sub_res
+                                else:
+                                    return res.group(0)
+                            return False
+                        # ======
+                        # search for first encounter of \begin \end pair with less than 25 lines in the middle
+                        ps = search_with_line_limit(target_string) 
+                        if not ps: break
+                        res = re.search(re.escape(ps), target_string, flags)
+                        if not res: assert False
+                        before = res.string[:res.span()[0]]
+                        this = res.group(0)
+                        after = res.string[res.span()[1]:]
+                        # ======
+                        lt.string = before
+                        tmp  = lt.next
+                        # ======
+                        mid = LinkedListNode(this, True)
+                        lt.next = mid
+                        # ======
+                        aft = LinkedListNode(after, False)
+                        mid.next = aft
+                        aft.next = tmp
+                        # ======
+                        lt = aft
+                lt = lt.next
+                cnt += 1
+                # print(cnt)
+                if lt is None: break
+
+
+        # root 是链表的头
+        print('正在分解Latex源文件，构建链表结构')
+        # 删除iffalse注释
+        split_worker(root, r"\\iffalse(.*?)\\fi", re.DOTALL)
+        # 吸收在25行以内的begin-end组合
+        split_worker_begin_end(root, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
+        # 吸收匿名公式
+        split_worker(root, r"\$\$(.*?)\$\$", re.DOTALL)
+        # 吸收其他杂项
+        split_worker(root, r"(.*?)\\maketitle", re.DOTALL)
+        split_worker(root, r"\\section\{(.*?)\}")
+        split_worker(root, r"\\section\*\{(.*?)\}")
+        split_worker(root, r"\\subsection\{(.*?)\}")
+        split_worker(root, r"\\subsubsection\{(.*?)\}")
+        split_worker(root, r"\\bibliography\{(.*?)\}")
+        split_worker(root, r"\\bibliographystyle\{(.*?)\}")
+        split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
+        split_worker(root, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
+        split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
+        split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
+        split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
+        split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
+        split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
+        split_worker(root, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
+        split_worker(root, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
+        split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
+        split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
+        split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
+        split_worker(root, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
+        split_worker(root, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
+        split_worker(root, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
+        split_worker(root, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
+        split_worker(root, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
+        split_worker(root, r"\\item ")
+        split_worker(root, r"\\label\{(.*?)\}")
+        split_worker(root, r"\\begin\{(.*?)\}")
+        split_worker(root, r"\\vspace\{(.*?)\}")
+        split_worker(root, r"\\hspace\{(.*?)\}")
+        split_worker(root, r"\\end\{(.*?)\}")
+
+        node = root
+        while True:
+            if len(node.string.strip('\n').strip(''))==0: node.preserve = True
+            if len(node.string.strip('\n').strip(''))<50: node.preserve = True
+            node = node.next
+            if node is None: break
+
+        # 修复括号
+        node = root
+        while True:
+            string = node.string
+            if node.preserve: 
+                node = node.next
+                if node is None: break
+                continue
+            def break_check(string):
+                str_stack = [""] # (lv, index)
+                for i, c in enumerate(string):
+                    if c == '{':
+                        str_stack.append('{')
+                    elif c == '}':
+                        if len(str_stack) == 1:
+                            print('stack kill')
+                            return i
+                        str_stack.pop(-1)
+                    else:
+                        str_stack[-1] += c
+                return -1
+            bp = break_check(string)
+
+            if bp == -1:
+                pass
+            elif bp == 0:
+                node.string = string[:1]
+                q = LinkedListNode(string[1:], False)
+                q.next = node.next
+                node.next = q
+            else:
+                node.string = string[:bp]
+                q = LinkedListNode(string[bp:], False)
+                q.next = node.next
+                node.next = q
+
+            node = node.next
+            if node is None: break
+
+        node = root
+        while True:
+            if len(node.string.strip('\n').strip(''))==0: node.preserve = True
+            if len(node.string.strip('\n').strip(''))<50: node.preserve = True
+            node = node.next
+            if node is None: break
+
+        # 将前后断行符脱离
+        node = root
+        prev_node = None
+        while True:
+            if not node.preserve:
+                lstriped_ = node.string.lstrip().lstrip('\n')
+                if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
+                    prev_node.string += node.string[:-len(lstriped_)]
+                    node.string = lstriped_
+                rstriped_ = node.string.rstrip().rstrip('\n')
+                if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
+                    node.next.string = node.string[len(rstriped_):] + node.next.string
+                    node.string = rstriped_
+            # =====
+            prev_node = node
+            node = node.next
+            if node is None: break
+
+        # 将分解结果返回 res_to_t
+        with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
+            res_to_t = []
+            node = root
+            while True:
+                show_html = node.string.replace('\n','<br/>')
+                if not node.preserve:
+                    res_to_t.append(node.string)
+                    f.write(f'<p style="color:black;">#{show_html}#</p>')
+                else:
+                    f.write(f'<p style="color:red;">{show_html}</p>')
+                node = node.next
+                if node is None: break
+
+        self.root = root
+        self.sp = res_to_t
+        return self.sp
+
+class LatexPaperFileGroup():
+    def __init__(self):
+        self.file_paths = []
+        self.file_contents = []
+        self.sp_file_contents = []
+        self.sp_file_index = []
+        self.sp_file_tag = []
+
+        # count_token
+        from request_llm.bridge_all import model_info
+        enc = model_info["gpt-3.5-turbo"]['tokenizer']
+        def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
+        self.get_token_num = get_token_num
+
+    def run_file_split(self, max_token_limit=1900):
+        """
+        将长文本分离开来
+        """
+        for index, file_content in enumerate(self.file_contents):
+            if self.get_token_num(file_content) < max_token_limit:
+                self.sp_file_contents.append(file_content)
+                self.sp_file_index.append(index)
+                self.sp_file_tag.append(self.file_paths[index])
+            else:
+                from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
+                segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
+                for j, segment in enumerate(segments):
+                    self.sp_file_contents.append(segment)
+                    self.sp_file_index.append(index)
+                    self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
+        print('Segmentation: done')
+
+    def merge_result(self):
+        self.file_result = ["" for _ in range(len(self.file_paths))]
+        for r, k in zip(self.sp_file_result, self.sp_file_index):
+            self.file_result[k] += r
+
+    def write_result(self):
+        manifest = []
+        for path, res in zip(self.file_paths, self.file_result):
+            with open(path + '.polish.tex', 'w', encoding='utf8') as f:
+                manifest.append(path + '.polish.tex')
+                f.write(res)
+        return manifest
+
+
+
+def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None):
+    import time, os, re
+    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
+
+    #  <-------- 寻找主tex文件 ----------> 
+    maintex = 寻找Latex主文件(file_manifest, mode)
+    chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果：该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+    time.sleep(5)
+
+    #  <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ----------> 
+    main_tex_basename = os.path.basename(maintex)
+    assert main_tex_basename.endswith('.tex')
+    main_tex_basename_bare = main_tex_basename[:-4]
+    may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
+    if os.path.exists(may_exist_bbl):
+        shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
+        shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
+        shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
+
+    with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
+        content = f.read()
+        merged_content = merge_tex_files(project_folder, content, mode)
+
+    with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
+        f.write(merged_content)
+
+    #  <-------- 精细切分latex文件 ----------> 
+    lps = LatexPaperSplit()
+    res = lps.split(merged_content, project_folder)
+
+    #  <-------- 拆分过长的latex片段 ----------> 
+    pfg = LatexPaperFileGroup()
+    for index, r in enumerate(res):
+        pfg.file_paths.append('segment-' + str(index))
+        pfg.file_contents.append(r)
+
+    pfg.run_file_split(max_token_limit=1024)
+    n_split = len(pfg.sp_file_contents)
+
+    #  <-------- 根据需要切换prompt ----------> 
+    inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
+    inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
+
+    if os.path.exists(pj(project_folder,'temp.pkl')):
+
+        #  <-------- 【仅调试】如果存在调试缓存文件，则跳过GPT请求环节 ----------> 
+        pfg = objload(file=pj(project_folder,'temp.pkl'))
+
+    else:
+        #  <-------- gpt 多线程请求 ----------> 
+        gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+            inputs_array=inputs_array,
+            inputs_show_user_array=inputs_show_user_array,
+            llm_kwargs=llm_kwargs,
+            chatbot=chatbot,
+            history_array=[[""] for _ in range(n_split)],
+            sys_prompt_array=sys_prompt_array,
+            # max_workers=5,  # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
+            scroller_max_len = 40
+        )
+
+        #  <-------- 文本碎片重组为完整的tex片段 ----------> 
+        pfg.sp_file_result = []
+        for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
+            pfg.sp_file_result.append(gpt_say)
+        pfg.merge_result()
+
+        # <-------- 临时存储用于调试 ----------> 
+        pfg.get_token_num = None
+        objdump(pfg, file=pj(project_folder,'temp.pkl'))
+
+
+    #  <-------- 写出文件 ----------> 
+    msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
+    final_tex = lps.merge_result(pfg.file_result, mode, msg)
+    with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
+        f.write(final_tex)
+
+    #  <-------- 整理结果, 退出 ----------> 
+    chatbot.append((f"完成了吗？", 'GPT结果已输出, 正在编译PDF'))
+    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+
+    #  <-------- 返回 ----------> 
+    return project_folder + f'/merge_{mode}.tex'
+
+
+
+def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
+    try:
+        with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
+            log = f.read()
+        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+            file_lines = f.readlines()
+        import re
+        buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
+        buggy_lines = [int(l) for l in buggy_lines]
+        buggy_lines = sorted(buggy_lines)
+        print("removing lines that has errors", buggy_lines)
+        file_lines.pop(buggy_lines[0]-1)
+        with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
+            f.writelines(file_lines)
+        return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
+    except:
+        return False, 0, [0]
+    
+
+def compile_latex_with_timeout(command, timeout=60):
+    import subprocess
+    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    try:
+        stdout, stderr = process.communicate(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        process.kill()
+        stdout, stderr = process.communicate()
+        print("Process timed out!")
+        return False
+    return True
+
+def 编译Latex差别(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
+    import os, time
+    current_dir = os.getcwd()
+    n_fix = 1
+    max_try = 32
+    chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}，如果程序停顿5分钟以上，则大概率是卡死在Latex里面了。不幸卡死时请直接去该路径下取回翻译结果，或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
+    chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
+    yield from update_ui_lastest_msg('编译已经开始...', chatbot, history)   # 刷新Gradio前端界面
+
+    while True:
+        import os
+        # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
+        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
+        os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
+
+        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history)   # 刷新Gradio前端界面
+        os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+        
+        if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
+            # 只有第二步成功，才能继续下面的步骤
+            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history)    # 刷新Gradio前端界面
+            if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
+                os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex  {main_file_original}.aux'); os.chdir(current_dir)
+            if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
+                os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex  {main_file_modified}.aux'); os.chdir(current_dir)
+
+            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history)  # 刷新Gradio前端界面
+            os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
+            os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+            os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
+            os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+
+            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
+            print(    f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+            ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+
+            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
+            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+            os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux'); os.chdir(current_dir)
+            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+
+        # <--------------------->
+        os.chdir(current_dir)
+
+        # <---------- 检查结果 ----------->
+        results_ = ""
+        original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
+        modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
+        diff_pdf_success     = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
+        results_ += f"原始PDF编译是否成功: {original_pdf_success};" 
+        results_ += f"转化PDF编译是否成功: {modified_pdf_success};" 
+        results_ += f"对比PDF编译是否成功: {diff_pdf_success};" 
+        yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
+
+        if modified_pdf_success:
+            yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history)    # 刷新Gradio前端界面
+            os.chdir(current_dir)
+            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
+            if os.path.exists(pj(work_folder, '..', 'translation')):
+                shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
+            promote_file_to_downloadzone(result_pdf)
+            return True # 成功啦
+        else:
+            if n_fix>=max_try: break
+            n_fix += 1
+            can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
+                file_path=pj(work_folder_modified, f'{main_file_modified}.tex'), 
+                log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
+                tex_name=f'{main_file_modified}.tex',
+                tex_name_pure=f'{main_file_modified}',
+                n_fix=n_fix,
+                work_folder_modified=work_folder_modified,
+            )
+            yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history)   # 刷新Gradio前端界面
+            if not can_retry: break
+
+    os.chdir(current_dir)
+    return False # 失败啦
+
+
+