update

2023-06-21 17:58:13 +08:00
parent b24e664a85
commit 78df094eb9
13 changed files with 696 additions and 153 deletions
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@ -1,13 +1,13 @@
 from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
 from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
+from functools import partial
 import glob, os, requests, time
 pj = os.path.join
-# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
-# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
-ARXIV_CACHE_DIR = os.getenv("Arxiv_Cache")
+ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
+
 # =================================== 工具函数 ===============================================
 专业词汇声明  = 'If the term "agent" is used in this section, it should be translated to "智能体". '
-def switch_prompt(pfg, mode):
+def switch_prompt(pfg, mode, more_requirement):
    """
    Generate prompts and system prompts based on the mode for proofreading or translating.
    Args:
@ -26,7 +26,7 @@ def switch_prompt(pfg, mode):
                        f"\n\n{frag}" for frag in pfg.sp_file_contents]
        sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
    elif mode == 'translate_zh':
-        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + 
+        inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement + 
                        r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + 
                        r"Answer me only with the translated text:" + 
                        f"\n\n{frag}" for frag in pfg.sp_file_contents]
@ -80,7 +80,7 @@ def arxiv_download(chatbot, history, txt):
            os.makedirs(translation_dir)
        target_file = pj(translation_dir, 'translate_zh.pdf')
        if os.path.exists(target_file):
-            promote_file_to_downloadzone(target_file)
+            promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
            return target_file
        return False
    def is_float(s):
@ -89,8 +89,10 @@ def arxiv_download(chatbot, history, txt):
            return True
        except ValueError:
            return False
-    if ('.' in txt) and ('/' not in txt) and is_float(txt):
-        txt = 'https://arxiv.org/abs/' + txt
+    if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
+        txt = 'https://arxiv.org/abs/' + txt.strip()
+    if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
+        txt = 'https://arxiv.org/abs/' + txt[:10]
    if not txt.startswith('https://arxiv.org'): 
        return txt, None
    
@ -106,6 +108,7 @@ def arxiv_download(chatbot, history, txt):
        return msg, None
    # <-------------- set format ------------->
    arxiv_id = url_.split('/abs/')[-1]
+    if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
    cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
    if cached_translation_pdf: return cached_translation_pdf, arxiv_id

@ -178,7 +181,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo

    # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
    if not os.path.exists(project_folder + '/merge_proofread.tex'):
-        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, 
+                                chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)


    # <-------------- compile PDF ------------->
@ -187,13 +191,14 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
    

    # <-------------- zip PDF ------------->
-    zip_result(project_folder)
+    zip_res = zip_result(project_folder)
    if success:
        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
    else:
        chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)

    # <-------------- we are done ------------->
    return success
@ -209,6 +214,10 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
        "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳，Linux下必须使用Docker安装，详见项目主README.md。目前仅支持GPT3.5/GPT4，其他模型转化效果未知。目前对机器学习类文献转化效果最好，其他类型文献转化效果未知。"])
    yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

+    # <-------------- more requirements ------------->
+    if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+    more_req = plugin_kwargs.get("advanced_arg", "")
+    _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)

    # <-------------- check deps ------------->
    try:
@ -256,21 +265,23 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,

    # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
    if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
-        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt)
+        yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, 
+                                chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)


    # <-------------- compile PDF ------------->
-    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', 
+    success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh', 
                             work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)

    # <-------------- zip PDF ------------->
-    zip_result(project_folder)
+    zip_res = zip_result(project_folder)
    if success:
        chatbot.append((f"成功啦", '请查收结果（压缩包）...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
    else:
        chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果（压缩包）, 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
        yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+        promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)


    # <-------------- we are done ------------->
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@ -189,6 +189,7 @@ def test_Latex():
    # txt = r"https://arxiv.org/abs/2211.16068"                     #  ACE
    # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder"  #  ACE
    txt = r"https://arxiv.org/abs/2002.09253"
+    txt = r"https://arxiv.org/abs/2306.07831"
    for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
        cli_printer.print(cb)   #  print(cb)

@ -217,6 +218,7 @@ def test_Latex():
 # test_数学动画生成manim()
 # test_Langchain知识库()
 # test_Langchain知识库读取()
-test_Latex()
-input("程序完成，回车退出。")
-print("退出。")
+if __name__ == "__main__":
+    test_Latex()
+    input("程序完成，回车退出。")
+    print("退出。")
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@ -698,3 +698,51 @@ def try_install_deps(deps):
    for dep in deps:
        import subprocess, sys
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
+
+
+class construct_html():
+    def __init__(self) -> None:
+        self.css = """
+.row {
+  display: flex;
+  flex-wrap: wrap;
+}
+
+.column {
+  flex: 1;
+  padding: 10px;
+}
+
+.table-header {
+  font-weight: bold;
+  border-bottom: 1px solid black;
+}
+
+.table-row {
+  border-bottom: 1px solid lightgray;
+}
+
+.table-cell {
+  padding: 5px;
+}
+        """
+        self.html_string = f'<!DOCTYPE html><head><meta charset="utf-8"><title>翻译结果</title><style>{self.css}</style></head>'
+
+
+    def add_row(self, a, b):
+        tmp = """
+<div class="row table-row">
+    <div class="column table-cell">REPLACE_A</div>
+    <div class="column table-cell">REPLACE_B</div>
+</div>
+        """
+        from toolbox import markdown_convertion
+        tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
+        tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
+        self.html_string += tmp
+
+
+    def save_file(self, file_name):
+        with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
+            f.write(self.html_string.encode('utf-8', 'ignore').decode())
+
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@ -175,9 +175,8 @@ def merge_tex_files(project_foler, main_file, mode):
        main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
        # fontset=windows
        import platform
-        if platform.system() != 'Windows':
-            main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file)
-            main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file)
+        main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
+        main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
        # find paper abstract
        pattern = re.compile(r'\\begin\{abstract\}.*\n')
        match = pattern.search(main_file)
@ -213,6 +212,8 @@ def fix_content(final_tex, node_string):
    final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
    final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)

+    if "Traceback" in final_tex and "[Local Message]" in final_tex:
+        final_tex = node_string # 出问题了，还原原文
    if node_string.count('\\begin') != final_tex.count('\\begin'):
        final_tex = node_string # 出问题了，还原原文
    if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
@ -404,7 +405,7 @@ class LatexPaperSplit():
    def __init__(self) -> None:
        self.nodes = None
        self.msg = "{\\scriptsize\\textbf{警告：该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成，" + \
-            "版权归原文作者所有。翻译内容可靠性无任何保障，请仔细鉴别并以原文为准。" + \
+            "版权归原文作者所有。翻译内容可靠性无保障，请仔细鉴别并以原文为准。" + \
            "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
        # 请您不要删除或修改这行警告，除非您是论文的原作者（如果您是论文原作者，欢迎加REAME中的QQ联系开发者）
        self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响，禁止移除或修改此警告。}}\\\\" 
@ -498,7 +499,32 @@ class LatexPaperFileGroup():
                f.write(res)
        return manifest

+def write_html(sp_file_contents, sp_file_result, chatbot):

+    # write html
+    try:
+        import copy
+        from .crazy_utils import construct_html
+        from toolbox import gen_time_str
+        ch = construct_html() 
+        orig = ""
+        trans = ""
+        final = []
+        for c,r in zip(sp_file_contents, sp_file_result): 
+            final.append(c)
+            final.append(r)
+        for i, k in enumerate(final): 
+            if i%2==0:
+                orig = k
+            if i%2==1:
+                trans = k
+                ch.add_row(a=orig, b=trans)
+        create_report_file_name = f"{gen_time_str()}.trans.html"
+        ch.save_file(create_report_file_name)
+        promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
+    except:
+        from toolbox import trimmed_format_exc
+        print('writing html result failed:', trimmed_format_exc())

 def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
    import time, os, re
@ -575,6 +601,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
        pfg.get_token_num = None
        objdump(pfg, file=pj(project_folder,'temp.pkl'))

+    write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)

    #  <-------- 写出文件 ----------> 
    msg = f"当前大语言模型: {llm_kwargs['llm_model']}，当前语言模型温度设定: {llm_kwargs['temperature']}。"
@ -624,7 +651,7 @@ def compile_latex_with_timeout(command, timeout=60):
        return False
    return True

-def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
+def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
    import os, time
    current_dir = os.getcwd()
    n_fix = 1
@ -635,6 +662,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f

    while True:
        import os
+
        # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
        yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history)   # 刷新Gradio前端界面
        os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
@ -656,15 +684,16 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
            os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
            os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)

-            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
-            print(    f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
-            ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+            if mode!='translate_zh':
+                yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
+                print(    f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+                ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex  {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')

-            yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux'); os.chdir(current_dir)
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
-            os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history)   # 刷新Gradio前端界面
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex    merge_diff.aux'); os.chdir(current_dir)
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+                os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex  -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)

        # <--------------------->
        os.chdir(current_dir)
@ -685,9 +714,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
            result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
            if os.path.exists(pj(work_folder, '..', 'translation')):
                shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
-            promote_file_to_downloadzone(result_pdf, chatbot=chatbot)
-            # 我重新指定了参数传入。
-            # promote_file_to_downloadzone(file=result_pdf, rename_file=None, chatbot=chatbot)
+            promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
            return True # 成功啦
        else:
            if n_fix>=max_try: break
--- a/crazy_functions/对话历史存档.py
+++ b/crazy_functions/对话历史存档.py
@ -1,4 +1,4 @@
-from toolbox import CatchException, update_ui
+from toolbox import CatchException, update_ui, promote_file_to_downloadzone
 from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
 import re

@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
        for h in history:
            f.write("\n>>>" + h)
        f.write('</code>')
-    res = '对话历史写入：' + os.path.abspath(f'./gpt_log/{file_name}')
-    print(res)
-    return res
+    promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot)
+    return '对话历史写入：' + os.path.abspath(f'./gpt_log/{file_name}')

 def gen_file_preview(file_name):
    try: