From 85b838b302cfd39a4028378befa536aea9d50647 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 4 Jun 2023 23:06:35 +0800 Subject: [PATCH 01/35] add Linux support --- crazy_functions/latex_utils.py | 11 +++++++++-- docs/Dockerfile+NoLocal+Latex | 22 ++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 docs/Dockerfile+NoLocal+Latex diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 3e128eb..9b8d873 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -50,7 +50,14 @@ def merge_tex_files(project_foler, main_file, mode): pattern = re.compile(r'\\documentclass.*\n') match = pattern.search(main_file) position = match.end() - main_file = main_file[:position] + '\\usepackage{CTEX}\n\\usepackage{url}\n' + main_file[position:] + add_ctex = '\\usepackage{ctex}\n' + add_url = '\\usepackage{url}\n' if '{url}' not in main_file else '' + main_file = main_file[:position] + add_ctex + add_url + main_file[position:] + # 2 fontset=windows + import platform + if platform.system() != 'Windows': + main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file) + main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file) new_file_remove_comment_lines = [] for l in main_file.splitlines(): @@ -135,7 +142,7 @@ class LatexPaperSplit(): match = pattern.search(result_string) position = match.end() result_string = result_string[:position] + \ - "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成,其内容可靠性没有任何保障,请仔细鉴别并以原文为准。" + \ + "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成,版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + \ msg + \ "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" + \ diff --git a/docs/Dockerfile+NoLocal+Latex b/docs/Dockerfile+NoLocal+Latex new file mode 100644 index 0000000..f93afdb --- /dev/null +++ b/docs/Dockerfile+NoLocal+Latex @@ -0,0 +1,22 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# - 1 修改 `config.py` +# - 2 下载 simhei.ttf, simkai.ttf, simsun.ttc 三种字体置于目录下 +# - 3 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . +# - 4 运行 docker run --rm -it --net=host gpt-academic-nolocal-latex + +FROM fuqingxu/python311_texlive_ctex:latest + +# 指定路径 +WORKDIR /gpt + +# 装载项目文件 +COPY . . + +# 安装依赖 +RUN pip3 install -r requirements.txt + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 启动 +CMD ["python3", "-u", "main.py"] From 7f5be93c1db085e99d82772527c805a44e9c2e95 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 5 Jun 2023 22:57:39 +0800 Subject: [PATCH 02/35] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=AD=A3=E5=88=99=E5=8C=B9=E9=85=8Dbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 14 +- crazy_functions/crazy_functions_test.py | 10 +- crazy_functions/latex_utils.py | 274 ++++++++++++------------ 3 files changed, 150 insertions(+), 148 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index daac763..4cae80a 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -65,8 +65,10 @@ def move_project(project_folder, arxiv_id=None): new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder') else: new_workfolder = f'gpt_log/{gen_time_str()}' - try: shutil.rmtree(new_workfolder) - except: pass + try: + shutil.rmtree(new_workfolder) + except: + pass shutil.copytree(src=project_folder, dst=new_workfolder) return new_workfolder @@ -134,7 +136,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo try: import glob, os, time os.system(f'pdflatex -version') - from .latex_utils import Latex精细分解与转化, 编译Latex差别 + from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) @@ -172,7 +174,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- compile PDF -------------> - success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) @@ -204,7 +206,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, try: import glob, os, time os.system(f'pdflatex -version') - from .latex_utils import Latex精细分解与转化, 编译Latex差别 + from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) @@ -247,7 +249,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- compile PDF -------------> - success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) # <-------------- zip PDF -------------> diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index d4e3274..21c6713 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -186,12 +186,12 @@ def test_Latex(): txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery" txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06" txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20" - txt = r"https://arxiv.org/abs/1902.03185" txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40" - txt = r"https://arxiv.org/abs/2305.18290" - txt = r"https://arxiv.org/abs/2305.17608" - # txt = r"https://arxiv.org/abs/2306.00324" - txt = r"https://arxiv.org/abs/2211.16068" + txt = r"https://arxiv.org/abs/1902.03185" + # txt = r"https://arxiv.org/abs/2305.18290" + # txt = r"https://arxiv.org/abs/2305.17608" + # txt = r"https://arxiv.org/abs/2211.16068" # ACE + # txt = r"C:\Users\fuqingxu\arxiv_cache\2211.16068\workfolder" # ACE for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 9b8d873..4941354 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -2,8 +2,76 @@ from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界 from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone import os, shutil import re +import numpy as np pj = os.path.join +""" +======================================================================== +第一部分 +Latex 文件切分到一个链表中 +======================================================================== +""" +PRESERVE = 0 +TRANSFORM = 1 + +def split_worker(text, mask, pattern, flags=0): + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + mask[res.span()[0]:res.span()[1]] = PRESERVE + return text, mask + +def split_worker_reverse_caption(text, mask, pattern, flags=0): + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + mask[res.regs[1][0]:res.regs[1][1]] = TRANSFORM + return text, mask + +def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=25): + pattern_compile = re.compile(pattern, flags) + def search_with_line_limit(text, mask): + for res in pattern_compile.finditer(text): + cmd = res.group(1) # begin{what} + this = res.group(2) # content between begin and end + this_mask = mask[res.regs[2][0]:res.regs[2][1]] + white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', + 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate'] + if (cmd in white_list) or this.count('\n') >= 42: # use a magical number 42 + this, this_mask = search_with_line_limit(this, this_mask) + mask[res.regs[2][0]:res.regs[2][1]] = this_mask + else: + mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE + return text, mask + return search_with_line_limit(text, mask) + +class LinkedListNode(): + """ + 链表单元 + """ + def __init__(self, string, preserve=True) -> None: + self.string = string + self.preserve = preserve + self.next = None + self.begin_line = 0 + self.begin_char = 0 + +def convert_to_linklist(text, mask): + root = LinkedListNode("", preserve=True) + current_node = root + for c, m, i in zip(text, mask, range(len(text))): + if (m==PRESERVE and current_node.preserve) \ + or (m==TRANSFORM and not current_node.preserve): + # add + current_node.string += c + else: + current_node.next = LinkedListNode(c, preserve=(m==PRESERVE)) + current_node = current_node.next + return root +""" +======================================================================== +Latex 文件融合 +======================================================================== +""" + def 寻找Latex主文件(file_manifest, mode): """ 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。 @@ -71,19 +139,15 @@ def merge_tex_files(project_foler, main_file, mode): return main_file -class LinkedListNode(): - """ - 链表单元 - """ - def __init__(self, string, preserve=True) -> None: - self.string = string - self.preserve = preserve - self.next = None - +""" +======================================================================== +后处理 +======================================================================== +""" def mod_inbraket(match): """ - 为啥chatgpt会把cite里面的逗号换成中文逗号呀 艹 + 为啥chatgpt会把cite里面的逗号换成中文逗号呀 """ # get the matched string cmd = match.group(1) @@ -98,19 +162,24 @@ def fix_content(final_tex, node_string): """ Fix common GPT errors to increase success rate """ - final_tex = final_tex.replace('%', r'\%') - final_tex = final_tex.replace(r'\%', r'\\%') + final_tex = re.sub(r"(? 0 and node_string.count('\_') > final_tex.count('\_'): + # walk and replace any _ without \ + final_tex = re.sub(r"(? 25: - sub_res = search_with_line_limit(this) - if not sub_res: continue - else: return sub_res - else: - return res.group(0) - return False - # ====== - # search for first encounter of \begin \end pair with less than 25 lines in the middle - ps = search_with_line_limit(target_string) - if not ps: break - res = re.search(re.escape(ps), target_string, flags) - if not res: assert False - before = res.string[:res.span()[0]] - this = res.group(0) - after = res.string[res.span()[1]:] - # ====== - lt.string = before - tmp = lt.next - # ====== - mid = LinkedListNode(this, True) - lt.next = mid - # ====== - aft = LinkedListNode(after, False) - mid.next = aft - aft.next = tmp - # ====== - lt = aft - lt = lt.next - cnt += 1 - # print(cnt) - if lt is None: break - - - # root 是链表的头 - print('正在分解Latex源文件,构建链表结构') + # 吸收title与作者以上的部分 + text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL) # 删除iffalse注释 - split_worker(root, r"\\iffalse(.*?)\\fi", re.DOTALL) + text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) # 吸收在25行以内的begin-end组合 - split_worker_begin_end(root, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) + text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) # 吸收匿名公式 - split_worker(root, r"\$\$(.*?)\$\$", re.DOTALL) + text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) # 吸收其他杂项 - split_worker(root, r"(.*?)\\maketitle", re.DOTALL) - split_worker(root, r"\\section\{(.*?)\}") - split_worker(root, r"\\section\*\{(.*?)\}") - split_worker(root, r"\\subsection\{(.*?)\}") - split_worker(root, r"\\subsubsection\{(.*?)\}") - split_worker(root, r"\\bibliography\{(.*?)\}") - split_worker(root, r"\\bibliographystyle\{(.*?)\}") - split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) - split_worker(root, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) - split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) - split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) - split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) - split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) - split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) - split_worker(root, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) - split_worker(root, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) - split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) - split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) - split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) - split_worker(root, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) - split_worker(root, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) - split_worker(root, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) - split_worker(root, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) - split_worker(root, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) - split_worker(root, r"\\item ") - split_worker(root, r"\\label\{(.*?)\}") - split_worker(root, r"\\begin\{(.*?)\}") - split_worker(root, r"\\vspace\{(.*?)\}") - split_worker(root, r"\\hspace\{(.*?)\}") - split_worker(root, r"\\end\{(.*?)\}") - - node = root - while True: - if len(node.string.strip('\n').strip(''))==0: node.preserve = True - if len(node.string.strip('\n').strip(''))<50: node.preserve = True - node = node.next - if node is None: break - + text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\item ") + text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") + # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) + root = convert_to_linklist(text, mask) + # 将分解结果返回 res_to_t + with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: + res_to_t = [] + node = root + while True: + show_html = node.string.replace('\n','
') + if not node.preserve: + res_to_t.append(node.string) + f.write(f'

#{show_html}#

') + else: + f.write(f'

{show_html}

') + node = node.next + if node is None: break # 修复括号 node = root while True: @@ -295,7 +295,7 @@ class LatexPaperSplit(): str_stack.append('{') elif c == '}': if len(str_stack) == 1: - print('stack kill') + print('stack fix') return i str_stack.pop(-1) else: @@ -322,7 +322,7 @@ class LatexPaperSplit(): node = root while True: if len(node.string.strip('\n').strip(''))==0: node.preserve = True - if len(node.string.strip('\n').strip(''))<50: node.preserve = True + if len(node.string.strip('\n').strip(''))<42: node.preserve = True node = node.next if node is None: break @@ -418,7 +418,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin maintex = 寻找Latex主文件(file_manifest, mode) chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。')) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - time.sleep(5) + time.sleep(3) # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ----------> main_tex_basename = os.path.basename(maintex) @@ -529,7 +529,7 @@ def compile_latex_with_timeout(command, timeout=60): return False return True -def 编译Latex差别(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder): +def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder): import os, time current_dir = os.getcwd() n_fix = 1 From cea2144f347b5d663968abfa5470769b00df8699 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 5 Jun 2023 23:11:21 +0800 Subject: [PATCH 03/35] fix test samples --- crazy_functions/crazy_functions_test.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 21c6713..d19d653 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -181,13 +181,9 @@ def test_Langchain知识库读取(): def test_Latex(): from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF - txt = "C:/Users/fuqingxu/Desktop/proofread" - txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/paperx" - txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery" - txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06" - txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20" - txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40" - txt = r"https://arxiv.org/abs/1902.03185" + + txt = r"https://arxiv.org/abs/1706.03762" + # txt = r"https://arxiv.org/abs/1902.03185" # txt = r"https://arxiv.org/abs/2305.18290" # txt = r"https://arxiv.org/abs/2305.17608" # txt = r"https://arxiv.org/abs/2211.16068" # ACE From 66018840dae594234a8524d9737a07336b9fb8f7 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 5 Jun 2023 23:24:41 +0800 Subject: [PATCH 04/35] declare resp --- crazy_functions/latex_utils.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 4941354..99672e8 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -189,6 +189,10 @@ class LatexPaperSplit(): root是链表的根节点 """ self.root = None + self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ + "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ + "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" def merge_result(self, arr, mode, msg): """ @@ -206,18 +210,10 @@ class LatexPaperSplit(): node = node.next if node is None: break if mode == 'translate_zh': - try: - pattern = re.compile(r'\\begin\{abstract\}.*\n') - match = pattern.search(result_string) - position = match.end() - result_string = result_string[:position] + \ - "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成,版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ - "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + \ - msg + \ - "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" + \ - result_string[position:] - except: - pass + pattern = re.compile(r'\\begin\{abstract\}.*\n') + match = pattern.search(result_string) + position = match.end() + result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string def split(self, txt, project_folder): @@ -487,7 +483,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" final_tex = lps.merge_result(pfg.file_result, mode, msg) with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f: - f.write(final_tex) + if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex) + # <-------- 整理结果, 退出 ----------> chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF')) From a26b2948172ca5023f0cee06bd02ff861fded269 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 5 Jun 2023 23:44:59 +0800 Subject: [PATCH 05/35] Write Some Docstring --- crazy_functions/latex_utils.py | 66 ++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 99672e8..b490b5c 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -7,26 +7,36 @@ pj = os.path.join """ ======================================================================== -第一部分 -Latex 文件切分到一个链表中 +Part One +Latex segmentation to a linklist ======================================================================== """ PRESERVE = 0 TRANSFORM = 1 def split_worker(text, mask, pattern, flags=0): + """ + Add a preserve text area in this paper + """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask def split_worker_reverse_caption(text, mask, pattern, flags=0): + """ + Move caption area out of preserve area + """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): mask[res.regs[1][0]:res.regs[1][1]] = TRANSFORM return text, mask -def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=25): +def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): + """ + Find all \begin{} ... \end{} text block that with less than limit_n_lines lines. + Add it to preserve area + """ pattern_compile = re.compile(pattern, flags) def search_with_line_limit(text, mask): for res in pattern_compile.finditer(text): @@ -35,7 +45,7 @@ def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=25): this_mask = mask[res.regs[2][0]:res.regs[2][1]] white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate'] - if (cmd in white_list) or this.count('\n') >= 42: # use a magical number 42 + if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42 this, this_mask = search_with_line_limit(this, this_mask) mask[res.regs[2][0]:res.regs[2][1]] = this_mask else: @@ -45,7 +55,7 @@ def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=25): class LinkedListNode(): """ - 链表单元 + Linked List Node """ def __init__(self, string, preserve=True) -> None: self.string = string @@ -68,7 +78,7 @@ def convert_to_linklist(text, mask): return root """ ======================================================================== -Latex 文件融合 +Latex Merge File ======================================================================== """ @@ -90,7 +100,7 @@ def 寻找Latex主文件(file_manifest, mode): def merge_tex_files_(project_foler, main_file, mode): """ - 递归地把多Tex工程整合为一个Tex文档 + Merge Tex project recrusively """ for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]): f = s.group(1) @@ -109,7 +119,7 @@ def merge_tex_files_(project_foler, main_file, mode): def merge_tex_files(project_foler, main_file, mode): """ - 递归地把多Tex工程整合为一个Tex文档(递归外层) + Merge Tex project recrusively P.S. 顺便把CTEX塞进去以支持中文 P.S. 顺便把Latex的注释去除 """ @@ -142,7 +152,7 @@ def merge_tex_files(project_foler, main_file, mode): """ ======================================================================== -后处理 +Post process ======================================================================== """ def mod_inbraket(match): @@ -182,7 +192,9 @@ def fix_content(final_tex, node_string): class LatexPaperSplit(): """ - 将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理 + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. """ def __init__(self) -> None: """ @@ -192,11 +204,12 @@ class LatexPaperSplit(): self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" def merge_result(self, arr, mode, msg): """ - 将GPT处理后的结果融合 + Merge the result after the GPT process completed """ result_string = "" node = self.root @@ -218,7 +231,9 @@ class LatexPaperSplit(): def split(self, txt, project_folder): """ - 将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理 + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. """ text = txt mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM @@ -263,19 +278,7 @@ class LatexPaperSplit(): text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) root = convert_to_linklist(text, mask) - # 将分解结果返回 res_to_t - with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: - res_to_t = [] - node = root - while True: - show_html = node.string.replace('\n','
') - if not node.preserve: - res_to_t.append(node.string) - f.write(f'

#{show_html}#

') - else: - f.write(f'

{show_html}

') - node = node.next - if node is None: break + # 修复括号 node = root while True: @@ -340,25 +343,26 @@ class LatexPaperSplit(): node = node.next if node is None: break - # 将分解结果返回 res_to_t with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: - res_to_t = [] + segment_parts_for_gpt = [] node = root while True: show_html = node.string.replace('\n','
') if not node.preserve: - res_to_t.append(node.string) + segment_parts_for_gpt.append(node.string) f.write(f'

#{show_html}#

') else: f.write(f'

{show_html}

') node = node.next if node is None: break - self.root = root - self.sp = res_to_t + self.sp = segment_parts_for_gpt return self.sp class LatexPaperFileGroup(): + """ + use tokenizer to break down text according to max_token_limit + """ def __init__(self): self.file_paths = [] self.file_contents = [] @@ -374,7 +378,7 @@ class LatexPaperFileGroup(): def run_file_split(self, max_token_limit=1900): """ - 将长文本分离开来 + use tokenizer to break down text according to max_token_limit """ for index, file_content in enumerate(self.file_contents): if self.get_token_num(file_content) < max_token_limit: From f23b66dec25b37565dd4b035b413b4c0255b17da Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Mon, 5 Jun 2023 23:49:54 +0800 Subject: [PATCH 06/35] update Dockerfile with Latex --- docs/Dockerfile+NoLocal+Latex | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/Dockerfile+NoLocal+Latex b/docs/Dockerfile+NoLocal+Latex index f93afdb..428dbc0 100644 --- a/docs/Dockerfile+NoLocal+Latex +++ b/docs/Dockerfile+NoLocal+Latex @@ -1,8 +1,7 @@ # 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM # - 1 修改 `config.py` -# - 2 下载 simhei.ttf, simkai.ttf, simsun.ttc 三种字体置于目录下 -# - 3 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . -# - 4 运行 docker run --rm -it --net=host gpt-academic-nolocal-latex +# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . +# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex FROM fuqingxu/python311_texlive_ctex:latest @@ -12,8 +11,23 @@ WORKDIR /gpt # 装载项目文件 COPY . . +ARG useProxyNetwork='' + + +# # # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除 +# RUN apt-get update +# RUN apt-get install -y curl proxychains +# RUN $useProxyNetwork curl cip.cc +# RUN sed -i '$ d' /etc/proxychains.conf +# RUN sed -i '$ d' /etc/proxychains.conf +# RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf +# ARG useProxyNetwork=proxychains +# # # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除 + + + # 安装依赖 -RUN pip3 install -r requirements.txt +RUN $useProxyNetwork pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From ddeaf764223ce288419af6a354a96c6cd4ac9989 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 6 Jun 2023 00:23:00 +0800 Subject: [PATCH 07/35] check latex in PATH --- crazy_functions/Latex输出PDF结果.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 4cae80a..ecba82b 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -134,8 +134,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- check deps -------------> try: - import glob, os, time - os.system(f'pdflatex -version') + import glob, os, time, subprocess + subprocess.Popen(['pdflatex', '-version']) from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", @@ -204,8 +204,8 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- check deps -------------> try: - import glob, os, time - os.system(f'pdflatex -version') + import glob, os, time, subprocess + subprocess.Popen(['pdflatex', '-version']) from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", From c65def90f3bcc152113580488a176e2f232fa140 Mon Sep 17 00:00:00 2001 From: MengDanzz <95761983+MengDanzz@users.noreply.github.com> Date: Tue, 6 Jun 2023 14:36:30 +0800 Subject: [PATCH 08/35] =?UTF-8?q?=E5=B0=86Dockerfile=20COPY=E5=88=86?= =?UTF-8?q?=E6=88=90=E4=B8=A4=E6=AE=B5=EF=BC=8C=E7=BC=93=E5=AD=98=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E5=BA=93=EF=BC=8C=E9=87=8D=E6=96=B0=E6=9E=84=E5=BB=BA?= =?UTF-8?q?=E4=B8=8D=E9=9C=80=E8=A6=81=E9=87=8D=E6=96=B0=E5=AE=89=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 19d988f..aa4eee8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,12 +10,14 @@ RUN echo '[global]' > /etc/pip.conf && \ WORKDIR /gpt -# 装载项目文件 -COPY . . + + # 安装依赖 +COPY requirements.txt ./ RUN pip3 install -r requirements.txt - +# 装载项目文件 +COPY . . # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From 40da1b0afefd42bb27255e6980ac9b3fd43d7654 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 6 Jun 2023 18:44:00 +0800 Subject: [PATCH 09/35] =?UTF-8?q?=E5=B0=86Latex=E5=88=86=E8=A7=A3=E7=A8=8B?= =?UTF-8?q?=E5=BA=8F=E6=94=BE=E5=88=B0=E5=AD=90=E8=BF=9B=E7=A8=8B=E6=89=A7?= =?UTF-8?q?=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 15 ++++- crazy_functions/crazy_functions_test.py | 8 +-- crazy_functions/latex_utils.py | 84 +++++++++++++++++-------- docs/Dockerfile+NoLocal+Latex | 21 ++----- 4 files changed, 80 insertions(+), 48 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index ecba82b..855cc1c 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -82,7 +82,14 @@ def arxiv_download(chatbot, history, txt): promote_file_to_downloadzone(target_file) return target_file return False - + def is_float(s): + try: + float(s) + return True + except ValueError: + return False + if ('.' in txt) and ('/' not in txt) and is_float(txt): + txt = 'https://arxiv.org/abs/' + txt if not txt.startswith('https://arxiv.org'): return txt, None @@ -198,7 +205,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- information about this plugin -------------> chatbot.append([ "函数插件功能?", - "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) + "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @@ -221,6 +228,8 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return + + if os.path.exists(txt): project_folder = txt else: @@ -228,6 +237,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") @@ -261,5 +271,6 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + # <-------------- we are done -------------> return success diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index d19d653..e743878 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -182,13 +182,13 @@ def test_Langchain知识库读取(): def test_Latex(): from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF - txt = r"https://arxiv.org/abs/1706.03762" + # txt = r"https://arxiv.org/abs/1706.03762" # txt = r"https://arxiv.org/abs/1902.03185" # txt = r"https://arxiv.org/abs/2305.18290" # txt = r"https://arxiv.org/abs/2305.17608" - # txt = r"https://arxiv.org/abs/2211.16068" # ACE - # txt = r"C:\Users\fuqingxu\arxiv_cache\2211.16068\workfolder" # ACE - + # txt = r"https://arxiv.org/abs/2211.16068" # ACE + # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE + txt = r"https://arxiv.org/abs/2002.09253" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index b490b5c..15dfebc 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -61,8 +61,8 @@ class LinkedListNode(): self.string = string self.preserve = preserve self.next = None - self.begin_line = 0 - self.begin_char = 0 + # self.begin_line = 0 + # self.begin_char = 0 def convert_to_linklist(text, mask): root = LinkedListNode("", preserve=True) @@ -97,11 +97,22 @@ def 寻找Latex主文件(file_manifest, mode): else: continue raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') - +def rm_comments(main_file): + new_file_remove_comment_lines = [] + for l in main_file.splitlines(): + # 删除整行的空注释 + if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")): + pass + else: + new_file_remove_comment_lines.append(l) + main_file = '\n'.join(new_file_remove_comment_lines) + main_file = re.sub(r'(? None: - """ - root是链表的根节点 - """ - self.root = None + self.nodes = None self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" @@ -212,16 +212,13 @@ class LatexPaperSplit(): Merge the result after the GPT process completed """ result_string = "" - node = self.root p = 0 - while True: + for node in self.nodes: if node.preserve: result_string += node.string else: result_string += fix_content(arr[p], node.string) p += 1 - node = node.next - if node is None: break if mode == 'translate_zh': pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(result_string) @@ -229,7 +226,27 @@ class LatexPaperSplit(): result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string - def split(self, txt, project_folder): + def split(self, txt, project_folder): + """ + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. + P.S. use multiprocessing to avoid timeout error + """ + import multiprocessing + manager = multiprocessing.Manager() + return_dict = manager.dict() + p = multiprocessing.Process( + target=lambda lps, txt, project_folder, return_dict: + lps.split_subprocess(txt, project_folder, return_dict), + args=(self, txt, project_folder, return_dict)) + p.start() + p.join() + self.nodes = return_dict['nodes'] + self.sp = return_dict['segment_parts_for_gpt'] + return self.sp + + def split_subprocess(self, txt, project_folder, return_dict): """ break down latex file to a linked list, each node use a preserve flag to indicate whether it should @@ -318,12 +335,20 @@ class LatexPaperSplit(): node = node.next if node is None: break + # 屏蔽空行和太短的句子 node = root while True: if len(node.string.strip('\n').strip(''))==0: node.preserve = True if len(node.string.strip('\n').strip(''))<42: node.preserve = True node = node.next if node is None: break + node = root + while True: + if node.next and node.preserve and node.next.preserve: + node.string += node.next.string + node.next = node.next.next + node = node.next + if node is None: break # 将前后断行符脱离 node = root @@ -345,8 +370,10 @@ class LatexPaperSplit(): with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: segment_parts_for_gpt = [] + nodes = [] node = root while True: + nodes.append(node) show_html = node.string.replace('\n','
') if not node.preserve: segment_parts_for_gpt.append(node.string) @@ -355,9 +382,11 @@ class LatexPaperSplit(): f.write(f'

{show_html}

') node = node.next if node is None: break - self.root = root - self.sp = segment_parts_for_gpt - return self.sp + + for n in nodes: n.next = None # break + return_dict['nodes'] = nodes + return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt + return return_dict class LatexPaperFileGroup(): """ @@ -439,7 +468,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin # <-------- 精细切分latex文件 ----------> lps = LatexPaperSplit() - res = lps.split(merged_content, project_folder) + res = lps.split(merged_content, project_folder) # 消耗时间的函数 # <-------- 拆分过长的latex片段 ----------> pfg = LatexPaperFileGroup() @@ -515,7 +544,8 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work f.writelines(file_lines) return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines except: - return False, 0, [0] + print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") + return False, -1, [-1] def compile_latex_with_timeout(command, timeout=60): diff --git a/docs/Dockerfile+NoLocal+Latex b/docs/Dockerfile+NoLocal+Latex index 428dbc0..0f9ac8a 100644 --- a/docs/Dockerfile+NoLocal+Latex +++ b/docs/Dockerfile+NoLocal+Latex @@ -8,26 +8,17 @@ FROM fuqingxu/python311_texlive_ctex:latest # 指定路径 WORKDIR /gpt +ARG useProxyNetwork='' + +RUN $useProxyNetwork pip3 install gradio openai numpy arxiv rich -i https://pypi.douban.com/simple/ +RUN $useProxyNetwork pip3 install colorama Markdown pygments pymupdf -i https://pypi.douban.com/simple/ + # 装载项目文件 COPY . . -ARG useProxyNetwork='' - - -# # # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除 -# RUN apt-get update -# RUN apt-get install -y curl proxychains -# RUN $useProxyNetwork curl cip.cc -# RUN sed -i '$ d' /etc/proxychains.conf -# RUN sed -i '$ d' /etc/proxychains.conf -# RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf -# ARG useProxyNetwork=proxychains -# # # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除 - - # 安装依赖 -RUN $useProxyNetwork pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple +RUN $useProxyNetwork pip3 install -r requirements.txt -i https://pypi.douban.com/simple/ # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From 8ef734410160f2b8090a2ec10b15069ee60da9b7 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Tue, 6 Jun 2023 18:57:52 +0800 Subject: [PATCH 10/35] fix subprocess bug in Windows --- crazy_functions/latex_utils.py | 288 +++++++++++++++++---------------- 1 file changed, 145 insertions(+), 143 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 15dfebc..d3d7b9c 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -192,6 +192,149 @@ def fix_content(final_tex, node_string): final_tex = node_string # 出问题了,还原原文 return final_tex +def split_subprocess(txt, project_folder, return_dict): + """ + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. + """ + text = txt + mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM + + # 吸收title与作者以上的部分 + text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL) + # 删除iffalse注释 + text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) + # 吸收在25行以内的begin-end组合 + text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) + # 吸收匿名公式 + text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) + # 吸收其他杂项 + text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\item ") + text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") + # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) + root = convert_to_linklist(text, mask) + + # 修复括号 + node = root + while True: + string = node.string + if node.preserve: + node = node.next + if node is None: break + continue + def break_check(string): + str_stack = [""] # (lv, index) + for i, c in enumerate(string): + if c == '{': + str_stack.append('{') + elif c == '}': + if len(str_stack) == 1: + print('stack fix') + return i + str_stack.pop(-1) + else: + str_stack[-1] += c + return -1 + bp = break_check(string) + + if bp == -1: + pass + elif bp == 0: + node.string = string[:1] + q = LinkedListNode(string[1:], False) + q.next = node.next + node.next = q + else: + node.string = string[:bp] + q = LinkedListNode(string[bp:], False) + q.next = node.next + node.next = q + + node = node.next + if node is None: break + + # 屏蔽空行和太短的句子 + node = root + while True: + if len(node.string.strip('\n').strip(''))==0: node.preserve = True + if len(node.string.strip('\n').strip(''))<42: node.preserve = True + node = node.next + if node is None: break + node = root + while True: + if node.next and node.preserve and node.next.preserve: + node.string += node.next.string + node.next = node.next.next + node = node.next + if node is None: break + + # 将前后断行符脱离 + node = root + prev_node = None + while True: + if not node.preserve: + lstriped_ = node.string.lstrip().lstrip('\n') + if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)): + prev_node.string += node.string[:-len(lstriped_)] + node.string = lstriped_ + rstriped_ = node.string.rstrip().rstrip('\n') + if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)): + node.next.string = node.string[len(rstriped_):] + node.next.string + node.string = rstriped_ + # ===== + prev_node = node + node = node.next + if node is None: break + + with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: + segment_parts_for_gpt = [] + nodes = [] + node = root + while True: + nodes.append(node) + show_html = node.string.replace('\n','
') + if not node.preserve: + segment_parts_for_gpt.append(node.string) + f.write(f'

#{show_html}#

') + else: + f.write(f'

{show_html}

') + node = node.next + if node is None: break + + for n in nodes: n.next = None # break + return_dict['nodes'] = nodes + return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt + return return_dict + + class LatexPaperSplit(): """ @@ -237,156 +380,15 @@ class LatexPaperSplit(): manager = multiprocessing.Manager() return_dict = manager.dict() p = multiprocessing.Process( - target=lambda lps, txt, project_folder, return_dict: - lps.split_subprocess(txt, project_folder, return_dict), - args=(self, txt, project_folder, return_dict)) + target=split_subprocess, + args=(txt, project_folder, return_dict)) p.start() p.join() self.nodes = return_dict['nodes'] self.sp = return_dict['segment_parts_for_gpt'] return self.sp - def split_subprocess(self, txt, project_folder, return_dict): - """ - break down latex file to a linked list, - each node use a preserve flag to indicate whether it should - be proccessed by GPT. - """ - text = txt - mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM - # 吸收title与作者以上的部分 - text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL) - # 删除iffalse注释 - text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) - # 吸收在25行以内的begin-end组合 - text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) - # 吸收匿名公式 - text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) - # 吸收其他杂项 - text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\item ") - text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") - # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) - root = convert_to_linklist(text, mask) - - # 修复括号 - node = root - while True: - string = node.string - if node.preserve: - node = node.next - if node is None: break - continue - def break_check(string): - str_stack = [""] # (lv, index) - for i, c in enumerate(string): - if c == '{': - str_stack.append('{') - elif c == '}': - if len(str_stack) == 1: - print('stack fix') - return i - str_stack.pop(-1) - else: - str_stack[-1] += c - return -1 - bp = break_check(string) - - if bp == -1: - pass - elif bp == 0: - node.string = string[:1] - q = LinkedListNode(string[1:], False) - q.next = node.next - node.next = q - else: - node.string = string[:bp] - q = LinkedListNode(string[bp:], False) - q.next = node.next - node.next = q - - node = node.next - if node is None: break - - # 屏蔽空行和太短的句子 - node = root - while True: - if len(node.string.strip('\n').strip(''))==0: node.preserve = True - if len(node.string.strip('\n').strip(''))<42: node.preserve = True - node = node.next - if node is None: break - node = root - while True: - if node.next and node.preserve and node.next.preserve: - node.string += node.next.string - node.next = node.next.next - node = node.next - if node is None: break - - # 将前后断行符脱离 - node = root - prev_node = None - while True: - if not node.preserve: - lstriped_ = node.string.lstrip().lstrip('\n') - if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)): - prev_node.string += node.string[:-len(lstriped_)] - node.string = lstriped_ - rstriped_ = node.string.rstrip().rstrip('\n') - if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)): - node.next.string = node.string[len(rstriped_):] + node.next.string - node.string = rstriped_ - # ===== - prev_node = node - node = node.next - if node is None: break - - with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: - segment_parts_for_gpt = [] - nodes = [] - node = root - while True: - nodes.append(node) - show_html = node.string.replace('\n','
') - if not node.preserve: - segment_parts_for_gpt.append(node.string) - f.write(f'

#{show_html}#

') - else: - f.write(f'

{show_html}

') - node = node.next - if node is None: break - - for n in nodes: n.next = None # break - return_dict['nodes'] = nodes - return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt - return return_dict class LatexPaperFileGroup(): """ From 9aafb2ee479f067ac4b6a955a7e43a1d0c553f50 Mon Sep 17 00:00:00 2001 From: MengDanzz <95761983+MengDanzz@users.noreply.github.com> Date: Wed, 7 Jun 2023 09:18:57 +0800 Subject: [PATCH 11/35] =?UTF-8?q?=E9=9D=9Epypi=E5=8C=85=E5=8A=A0=E5=85=A5C?= =?UTF-8?q?OPY?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index aa4eee8..77f4188 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,7 @@ WORKDIR /gpt # 安装依赖 COPY requirements.txt ./ +COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl RUN pip3 install -r requirements.txt # 装载项目文件 COPY . . From dae65fd2c293cb4c4c8370ce962d5038a24378ce Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Wed, 7 Jun 2023 10:43:45 +0800 Subject: [PATCH 12/35] =?UTF-8?q?=E5=9C=A8copy=20..=E5=90=8E=E5=9C=A8?= =?UTF-8?q?=E8=BF=90=E8=A1=8C=E4=B8=80=E6=AC=A1pip=20install=E6=A3=80?= =?UTF-8?q?=E6=9F=A5=E4=BE=9D=E8=B5=96=E5=8F=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 77f4188..97ad13d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl RUN pip3 install -r requirements.txt # 装载项目文件 COPY . . +RUN pip3 install -r requirements.txt # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From 149db621ec812fd2341fe3060b80ee210a81e528 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Wed, 7 Jun 2023 11:09:12 +0800 Subject: [PATCH 13/35] langchain check depends --- crazy_functions/Langchain知识库.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py index 36999d5..5b09d3b 100644 --- a/crazy_functions/Langchain知识库.py +++ b/crazy_functions/Langchain知识库.py @@ -75,9 +75,18 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro @CatchException def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1): + # resolve deps + try: + from zh_langchain import construct_vector_store + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from .crazy_utils import knowledge_archive_interface + except Exception as e: + chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + from .crazy_utils import try_install_deps + try_install_deps(['zh_langchain==0.2.0']) # < ------------------- --------------- > - from .crazy_utils import knowledge_archive_interface kai = knowledge_archive_interface() if 'langchain_plugin_embedding' in chatbot._cookies: From 77cc141a8227ee78a936b57de970cd74b89495e3 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:14:02 +0800 Subject: [PATCH 14/35] Update README.md --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 02f047d..c671477 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck),再回答问题,让信息永不过时 +Arxiv论文精密翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 @@ -285,11 +286,18 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
+
+ +
+10. Latex/Arxiv论文翻译功能 +
+ +
## 版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) -- version 3.4(Todo): 完善chatglm本地大模型的多线支持 +- version 3.4: +arxiv论文翻译、latex论文批改功能 - version 3.3: +互联网信息综合功能 - version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的LLM组合) - version 3.1: 支持同时问询多个gpt模型!支持api2d,支持多个apikey负载均衡 From e2de1d76ea9c2747b6ed0c5a90abc8863893bf20 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:18:31 +0800 Subject: [PATCH 15/35] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c671477..d72a29f 100644 --- a/README.md +++ b/README.md @@ -292,7 +292,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex/Arxiv论文翻译功能
- + ===> +
## 版本: From 10b3001dba7cde9ac6b8934eac287380f5ba16cf Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:19:11 +0800 Subject: [PATCH 16/35] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d72a29f..8ada026 100644 --- a/README.md +++ b/README.md @@ -292,8 +292,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex/Arxiv论文翻译功能
- ===> - + ===> +
## 版本: From ce6f11d2003864edc0fb22051403791c0cbba5b3 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:20:49 +0800 Subject: [PATCH 17/35] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8ada026..40125ac 100644 --- a/README.md +++ b/README.md @@ -292,8 +292,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex/Arxiv论文翻译功能
- ===> - + ===> +
## 版本: From a0ea5d0e9e7abd951e92162f457722e816848b62 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:22:03 +0800 Subject: [PATCH 18/35] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 40125ac..289cf06 100644 --- a/README.md +++ b/README.md @@ -284,15 +284,13 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex全文校对纠错
- -
-
- + ===> +
10. Latex/Arxiv论文翻译功能
- ===> +
From f9226d92be881faa2bea7e42f75c467b6ea2f7dd Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:24:14 +0800 Subject: [PATCH 19/35] Update version --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index ad75b2c..669c708 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.37, + "version": 3.4, "show_feature": true, - "new_feature": "修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持 <-> 提供复旦MOSS模型适配(启用需额外依赖) <-> 提供docker-compose方案兼容LLAMA盘古RWKV等模型的后端 <-> 新增Live2D装饰 <-> 完善对话历史的保存/载入/删除 <-> 保存对话功能" + "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" } From ff5403eac6e615c74a991dfecd93f9a5a12036d4 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:42:24 +0800 Subject: [PATCH 20/35] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 289cf06..d121116 100644 --- a/README.md +++ b/README.md @@ -284,8 +284,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex全文校对纠错
- ===> - + ===> +
10. Latex/Arxiv论文翻译功能 From f30c9c6d3bf34f63b82e59c031220124c0e1c35d Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:43:13 +0800 Subject: [PATCH 21/35] Update README.md --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d121116..d4526c9 100644 --- a/README.md +++ b/README.md @@ -233,27 +233,31 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h +2. Latex/Arxiv论文翻译功能 +
+ + +
- -2. 生成报告。大部分插件都会在执行结束后,生成工作报告 +3. 生成报告。大部分插件都会在执行结束后,生成工作报告
-3. 模块化功能设计,简单的接口却能支持强大的功能 +4. 模块化功能设计,简单的接口却能支持强大的功能
-4. 这是一个能够“自我译解”的开源项目 +5. 这是一个能够“自我译解”的开源项目
-5. 译解其他开源项目,不在话下 +6. 译解其他开源项目,不在话下
@@ -262,37 +266,33 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h -6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) +7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`)
-7. 新增MOSS大语言模型支持 +8. 新增MOSS大语言模型支持
-8. OpenAI图像生成 +9. OpenAI图像生成
-9. OpenAI音频解析与总结 +10. OpenAI音频解析与总结
-10. Latex全文校对纠错 +11. Latex全文校对纠错
===>
-10. Latex/Arxiv论文翻译功能 -
- - -
+ ## 版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) From b52695845e181399fb0b5607b26125a8070dd1e6 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:44:05 +0800 Subject: [PATCH 22/35] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4526c9..2045942 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 2. Latex/Arxiv论文翻译功能
- + ===>
From 110510997f9018d206c05384ccb7526ae9b96db2 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:48:52 +0800 Subject: [PATCH 23/35] Update README.md --- README.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2045942..d4d6858 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck),再回答问题,让信息永不过时 -Arxiv论文精密翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具 +⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具⭐ 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 @@ -233,7 +233,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h -2. Latex/Arxiv论文翻译功能 +2. ⭐Latex/Arxiv论文翻译功能⭐
===> @@ -241,9 +241,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 3. 生成报告。大部分插件都会在执行结束后,生成工作报告
- - - + +
4. 模块化功能设计,简单的接口却能支持强大的功能 @@ -259,11 +258,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 6. 译解其他开源项目,不在话下
- -
- -
- + +
7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) From e48d92e82e9634b5194947567bf7512a346d3343 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 8 Jun 2023 18:34:06 +0800 Subject: [PATCH 24/35] update translation --- docs/translate_english.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/translate_english.json b/docs/translate_english.json index d9968c6..57e008b 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -58,6 +58,8 @@ "连接网络回答问题": "ConnectToNetworkToAnswerQuestions", "联网的ChatGPT": "ChatGPTConnectedToNetwork", "解析任意code项目": "ParseAnyCodeProject", + "读取知识库作答": "ReadKnowledgeArchiveAnswerQuestions", + "知识库问答": "UpdateKnowledgeArchive", "同时问询_指定模型": "InquireSimultaneously_SpecifiedModel", "图片生成": "ImageGeneration", "test_解析ipynb文件": "Test_ParseIpynbFile", From ef1bfdd60f6b7c23bb23406cd8e0603f51f81165 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 8 Jun 2023 21:29:10 +0800 Subject: [PATCH 25/35] update pip install notice --- crazy_functions/数学动画生成manim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/数学动画生成manim.py b/crazy_functions/数学动画生成manim.py index 5851b9c..26e61b1 100644 --- a/crazy_functions/数学动画生成manim.py +++ b/crazy_functions/数学动画生成manim.py @@ -8,7 +8,7 @@ def inspect_dependency(chatbot, history): import manim return True except: - chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manimgl```"]) + chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manim manimgl```"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return False From 3c00e7a143f4c619166d4821d9804ef8aa0c5848 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 10 Jun 2023 21:45:38 +0800 Subject: [PATCH 26/35] file link in chatbot --- toolbox.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/toolbox.py b/toolbox.py index 18915d0..4b0e1dd 100644 --- a/toolbox.py +++ b/toolbox.py @@ -483,7 +483,9 @@ def on_report_generated(files, chatbot): if len(report_files) == 0: return None, chatbot # files.extend(report_files) - chatbot.append(['报告如何远程获取?', '报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。']) + file_links = '' + for f in report_files: file_links += f'
{f}' + chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) return report_files, chatbot def is_openai_api_key(key): From aeddf6b461d58eb7c755b1ed1d8ce2810cdf752f Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 11 Jun 2023 10:20:49 +0800 Subject: [PATCH 27/35] =?UTF-8?q?Update=20Latex=E8=BE=93=E5=87=BAPDF?= =?UTF-8?q?=E7=BB=93=E6=9E=9C.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 855cc1c..6592c9a 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -146,7 +146,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", - f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return @@ -216,7 +216,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", - f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return From 3ecf2977a86abaf49ddaf112a196bc7f8fcb6717 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 11 Jun 2023 18:23:54 +0800 Subject: [PATCH 28/35] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcaption=E7=BF=BB?= =?UTF-8?q?=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 59 +++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index d3d7b9c..afaae22 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -29,7 +29,15 @@ def split_worker_reverse_caption(text, mask, pattern, flags=0): """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): - mask[res.regs[1][0]:res.regs[1][1]] = TRANSFORM + brace_level = 0 + p = begin = end = res.regs[1][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p + mask[begin:end] = TRANSFORM return text, mask def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): @@ -97,6 +105,7 @@ def 寻找Latex主文件(file_manifest, mode): else: continue raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): @@ -108,6 +117,7 @@ def rm_comments(main_file): main_file = '\n'.join(new_file_remove_comment_lines) main_file = re.sub(r'(? 0 and node_string.count('\_') > final_tex.count('\_'): # walk and replace any _ without \ final_tex = re.sub(r"(? lps = LatexPaperSplit() - res = lps.split(merged_content, project_folder) # 消耗时间的函数 + res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 # <-------- 拆分过长的latex片段 ----------> pfg = LatexPaperFileGroup() From 790a1cf12a2a98811ccb4c38568f21b120049f7a Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 11 Jun 2023 20:12:25 +0800 Subject: [PATCH 29/35] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index afaae22..89ca7a5 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -23,9 +23,26 @@ def split_worker(text, mask, pattern, flags=0): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask -def split_worker_reverse_caption(text, mask, pattern, flags=0): +def split_worker_careful_brace(text, mask, pattern, flags=0): """ - Move caption area out of preserve area + Move area into preserve area + """ + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + brace_level = -1 + p = begin = end = res.regs[0][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p+1 + mask[begin:end] = PRESERVE + return text, mask + +def split_worker_reverse_careful_brace(text, mask, pattern, flags=0): + """ + Move area out of preserve area """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): @@ -274,7 +291,8 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") - text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) + text, mask = split_worker_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL) + text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) root = convert_to_linklist(text, mask) # 修复括号 @@ -504,6 +522,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin f.write(merged_content) # <-------- 精细切分latex文件 ----------> + chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 lps = LatexPaperSplit() res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 @@ -602,7 +622,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f current_dir = os.getcwd() n_fix = 1 max_try = 32 - chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,则大概率是卡死在Latex里面了。不幸卡死时请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) + chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面 yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面 From 9fd212652ed0e80d3e55b9b72461fc24d3837ce1 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 12 Jun 2023 09:45:59 +0800 Subject: [PATCH 30/35] =?UTF-8?q?=E4=B8=93=E4=B8=9A=E8=AF=8D=E6=B1=87?= =?UTF-8?q?=E5=A3=B0=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 6592c9a..2e9a30b 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -5,7 +5,7 @@ pj = os.path.join ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") # =================================== 工具函数 =============================================== -沙雕GPT啊别犯这些低级翻译错误 = 'You must to translate "agent" to "智能体". ' +专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' def switch_prompt(pfg, mode): """ Generate prompts and system prompts based on the mode for proofreading or translating. @@ -25,7 +25,7 @@ def switch_prompt(pfg, mode): f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] elif mode == 'translate_zh': - inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese." + 沙雕GPT啊别犯这些低级翻译错误 + + inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Answer me only with the translated text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] From 9ac3d0d65dfd1ae4209a28139d133057a3c22e39 Mon Sep 17 00:00:00 2001 From: OverKit <78402478+OverKit@users.noreply.github.com> Date: Mon, 12 Jun 2023 10:09:52 +0800 Subject: [PATCH 31/35] check letter % after removing spaces or tabs in the left --- crazy_functions/latex_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 89ca7a5..53894ca 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -127,7 +127,7 @@ def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): # 删除整行的空注释 - if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")): + if l.lstrip().startswith("%"): pass else: new_file_remove_comment_lines.append(l) From c365ea9f579acb88ffd756c3483c5c29fc2b57c3 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Tue, 13 Jun 2023 16:13:19 +0800 Subject: [PATCH 32/35] Update README.md --- README.md | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index d4d6858..581d3d5 100644 --- a/README.md +++ b/README.md @@ -228,7 +228,7 @@ docker-compose up 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件, 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。 -Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存,点击 `删除所有本地对话历史记录` 可以删除所有html存档缓存。 +Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。
@@ -251,38 +251,33 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
-5. 这是一个能够“自我译解”的开源项目 -
- -
- -6. 译解其他开源项目,不在话下 +5. 译解其他开源项目
-7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) +6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`)
-8. 新增MOSS大语言模型支持 +7. 新增MOSS大语言模型支持
-9. OpenAI图像生成 +8. OpenAI图像生成
-10. OpenAI音频解析与总结 +9. OpenAI音频解析与总结
-11. Latex全文校对纠错 +10. Latex全文校对纠错
===> @@ -310,30 +305,32 @@ gpt_academic开发者QQ群-2:610599535 - 已知问题 - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性Bug,请务必使用requirement.txt安装Gradio + - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio ## 参考与学习 ``` -代码中参考了很多其他优秀项目中的设计,主要包括: +代码中参考了很多其他优秀项目中的设计,顺序不分先后: -# 项目1:清华ChatGLM-6B: +# 清华ChatGLM-6B: https://github.com/THUDM/ChatGLM-6B -# 项目2:清华JittorLLMs: +# 清华JittorLLMs: https://github.com/Jittor/JittorLLMs -# 项目3:Edge-GPT: -https://github.com/acheong08/EdgeGPT - -# 项目4:ChuanhuChatGPT: -https://github.com/GaiZhenbiao/ChuanhuChatGPT - -# 项目5:ChatPaper: +# ChatPaper: https://github.com/kaixindelele/ChatPaper -# 更多: +# Edge-GPT: +https://github.com/acheong08/EdgeGPT + +# ChuanhuChatGPT: +https://github.com/GaiZhenbiao/ChuanhuChatGPT + +# Oobabooga one-click installer: +https://github.com/oobabooga/one-click-installers + +# More: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -https://github.com/oobabooga/one-click-installers ``` From c40ebfc21f39b995b3f0437b387d5ee561759327 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Wed, 14 Jun 2023 09:50:15 +0800 Subject: [PATCH 33/35] =?UTF-8?q?=E5=B0=86gpt-3.5-16k=E4=BD=9C=E4=B8=BA?= =?UTF-8?q?=E5=8A=A0=E5=85=A5=E6=94=AF=E6=8C=81=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- request_llm/bridge_all.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 14b089e..87e0ec9 100644 --- a/config.py +++ b/config.py @@ -46,7 +46,7 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ -AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # 本地LLM模型如ChatGLM的执行方式 CPU/GPU diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index b6efe21..a27407c 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -83,6 +83,15 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + + "gpt-3.5-turbo-16k": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 1024*16, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, "gpt-4": { "fn_with_ui": chatgpt_ui, From 8c62f21aa6b0c68bdc795f315f5d325b1384161b Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Wed, 14 Jun 2023 09:57:09 +0800 Subject: [PATCH 34/35] =?UTF-8?q?3.41=E5=A2=9E=E5=8A=A0gpt-3.5-16k?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index 669c708..ceb909a 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.4, + "version": 3.41, "show_feature": true, - "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" + "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" } From 73d4a1ff4b41548b6d6b5ea4c321fa2e81fe55ce Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:15:47 +0800 Subject: [PATCH 35/35] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 581d3d5..39b37ea 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 -[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持,[API2D](https://api2d.com/)接口支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? +[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama),[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/) 更多新功能展示(图像生成等) …… | 见本文档结尾处 ……