diff --git a/README.md b/README.md index 9a1e72d..02f047d 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ > **Note** > -> 5月27日对gradio依赖进行了较大的修复和调整,fork并解决了官方Gradio的一系列bug。但如果27日当天进行了更新,可能会导致代码报错(依赖缺失,卡在loading界面等),请及时更新到**最新版代码**并重新安装pip依赖即可。若给您带来困扰还请谅解。安装依赖时,请严格选择requirements.txt中**指定的版本**: +> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**: > -> `pip install -r requirements.txt -i https://pypi.org/simple` +> `pip install -r requirements.txt` > # GPT 学术优化 (GPT Academic) @@ -175,21 +175,26 @@ docker-compose up ## 安装-方法3:其他部署姿势 +1. 一键运行脚本。 +完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本, +不建议电脑上已有python的用户采用此方法(在此基础上安装插件的依赖很麻烦)。 +脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。 -1. 如何使用反代URL/微软云AzureAPI +2. 使用docker-compose运行。 +请阅读docker-compose.yml后,按照其中的提示操作即可 + +3. 如何使用反代URL/微软云AzureAPI。 按照`config.py`中的说明配置API_URL_REDIRECT即可。 -2. 远程云服务器部署(需要云服务器知识与经验) +4. 远程云服务器部署(需要云服务器知识与经验)。 请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) -3. 使用WSL2(Windows Subsystem for Linux 子系统) +5. 使用WSL2(Windows Subsystem for Linux 子系统)。 请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) -4. 如何在二级网址(如`http://localhost/subpath`)下运行 +6. 如何在二级网址(如`http://localhost/subpath`)下运行。 请访问[FastAPI运行说明](docs/WithFastapi.md) -5. 使用docker-compose运行 -请阅读docker-compose.yml后,按照其中的提示操作即可 --- # Advanced Usage ## 自定义新的便捷按钮 / 自定义函数插件 @@ -327,4 +332,5 @@ https://github.com/kaixindelele/ChatPaper # 更多: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo +https://github.com/oobabooga/one-click-installers ``` diff --git a/colorful.py b/colorful.py index d90972b..9749861 100644 --- a/colorful.py +++ b/colorful.py @@ -34,58 +34,28 @@ def print亮紫(*kw,**kargs): def print亮靛(*kw,**kargs): print("\033[1;36m",*kw,"\033[0m",**kargs) - - -def print亮红(*kw,**kargs): - print("\033[1;31m",*kw,"\033[0m",**kargs) -def print亮绿(*kw,**kargs): - print("\033[1;32m",*kw,"\033[0m",**kargs) -def print亮黄(*kw,**kargs): - print("\033[1;33m",*kw,"\033[0m",**kargs) -def print亮蓝(*kw,**kargs): - print("\033[1;34m",*kw,"\033[0m",**kargs) -def print亮紫(*kw,**kargs): - print("\033[1;35m",*kw,"\033[0m",**kargs) -def print亮靛(*kw,**kargs): - print("\033[1;36m",*kw,"\033[0m",**kargs) - -print_red = print红 -print_green = print绿 -print_yellow = print黄 -print_blue = print蓝 -print_purple = print紫 -print_indigo = print靛 - -print_bold_red = print亮红 -print_bold_green = print亮绿 -print_bold_yellow = print亮黄 -print_bold_blue = print亮蓝 -print_bold_purple = print亮紫 -print_bold_indigo = print亮靛 - -if not stdout.isatty(): - # redirection, avoid a fucked up log file - print红 = print - print绿 = print - print黄 = print - print蓝 = print - print紫 = print - print靛 = print - print亮红 = print - print亮绿 = print - print亮黄 = print - print亮蓝 = print - print亮紫 = print - print亮靛 = print - print_red = print - print_green = print - print_yellow = print - print_blue = print - print_purple = print - print_indigo = print - print_bold_red = print - print_bold_green = print - print_bold_yellow = print - print_bold_blue = print - print_bold_purple = print - print_bold_indigo = print \ No newline at end of file +# Do you like the elegance of Chinese characters? +def sprint红(*kw): + return "\033[0;31m"+' '.join(kw)+"\033[0m" +def sprint绿(*kw): + return "\033[0;32m"+' '.join(kw)+"\033[0m" +def sprint黄(*kw): + return "\033[0;33m"+' '.join(kw)+"\033[0m" +def sprint蓝(*kw): + return "\033[0;34m"+' '.join(kw)+"\033[0m" +def sprint紫(*kw): + return "\033[0;35m"+' '.join(kw)+"\033[0m" +def sprint靛(*kw): + return "\033[0;36m"+' '.join(kw)+"\033[0m" +def sprint亮红(*kw): + return "\033[1;31m"+' '.join(kw)+"\033[0m" +def sprint亮绿(*kw): + return "\033[1;32m"+' '.join(kw)+"\033[0m" +def sprint亮黄(*kw): + return "\033[1;33m"+' '.join(kw)+"\033[0m" +def sprint亮蓝(*kw): + return "\033[1;34m"+' '.join(kw)+"\033[0m" +def sprint亮紫(*kw): + return "\033[1;35m"+' '.join(kw)+"\033[0m" +def sprint亮靛(*kw): + return "\033[1;36m"+' '.join(kw)+"\033[0m" diff --git a/crazy_functional.py b/crazy_functional.py index f05c91a..7812e53 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -138,7 +138,7 @@ def get_crazy_functions(): ###################### 第二组插件 ########################### # [第二组插件]: 经过充分测试 from crazy_functions.批量总结PDF文档 import 批量总结PDF文档 - from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer + # from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档 from crazy_functions.谷歌检索小助手 import 谷歌检索小助手 from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入 @@ -164,17 +164,16 @@ def get_crazy_functions(): # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Function": HotReload(批量总结PDF文档) }, - "[测试功能] 批量总结PDF文档pdfminer": { - "Color": "stop", - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(批量总结PDF文档pdfminer) - }, + # "[测试功能] 批量总结PDF文档pdfminer": { + # "Color": "stop", + # "AsButton": False, # 加入下拉菜单中 + # "Function": HotReload(批量总结PDF文档pdfminer) + # }, "谷歌学术检索助手(输入谷歌学术搜索页url)": { "Color": "stop", "AsButton": False, # 加入下拉菜单中 "Function": HotReload(谷歌检索小助手) }, - "理解PDF文档内容 (模仿ChatPDF)": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Color": "stop", @@ -193,7 +192,7 @@ def get_crazy_functions(): "AsButton": False, # 加入下拉菜单中 "Function": HotReload(Latex英文纠错) }, - "[测试功能] 中文Latex项目全文润色(输入路径或上传压缩包)": { + "中文Latex项目全文润色(输入路径或上传压缩包)": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 "Color": "stop", "AsButton": False, # 加入下拉菜单中 @@ -222,15 +221,7 @@ def get_crazy_functions(): }) ###################### 第三组插件 ########################### - # [第三组插件]: 尚未充分测试的函数插件,放在这里 - from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要 - function_plugins.update({ - "一键下载arxiv论文并翻译摘要(先在input输入编号,如1812.10695)": { - "Color": "stop", - "AsButton": False, # 加入下拉菜单中 - "Function": HotReload(下载arxiv论文并翻译摘要) - } - }) + # [第三组插件]: 尚未充分测试的函数插件 from crazy_functions.联网的ChatGPT import 连接网络回答问题 function_plugins.update({ @@ -307,5 +298,56 @@ def get_crazy_functions(): except: print('Load function plugin failed') + try: + from crazy_functions.Langchain知识库 import 知识库问答 + function_plugins.update({ + "[功能尚不稳定] 构建知识库(请先上传文件素材)": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": "待注入的知识库名称id, 默认为default", + "Function": HotReload(知识库问答) + } + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.Langchain知识库 import 读取知识库作答 + function_plugins.update({ + "[功能尚不稳定] 知识库问答": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要首先调用构建知识库", + "Function": HotReload(读取知识库作答) + } + }) + except: + print('Load function plugin failed') + + try: + from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比 + function_plugins.update({ + "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": { + "Color": "stop", + "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": "", + "Function": HotReload(Latex英文纠错加PDF对比) + } + }) + from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF + function_plugins.update({ + "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": { + "Color": "stop", + "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": "", + "Function": HotReload(Latex翻译中文并重新编译PDF) + } + }) + except: + print('Load function plugin failed') ###################### 第n组插件 ########################### return function_plugins diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py new file mode 100644 index 0000000..36999d5 --- /dev/null +++ b/crazy_functions/Langchain知识库.py @@ -0,0 +1,98 @@ +from toolbox import CatchException, update_ui, ProxyNetworkActivate +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything + + + +@CatchException +def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 + plugin_kwargs 插件模型的参数,暂时没有用武之地 + chatbot 聊天显示框的句柄,用于显示给用户 + history 聊天历史,前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + history = [] # 清空历史,以免输入溢出 + chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # resolve deps + try: + from zh_langchain import construct_vector_store + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from .crazy_utils import knowledge_archive_interface + except Exception as e: + chatbot.append( + ["依赖不足", + "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."] + ) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + from .crazy_utils import try_install_deps + try_install_deps(['zh_langchain==0.2.0']) + + # < --------------------读取参数--------------- > + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + kai_id = plugin_kwargs.get("advanced_arg", 'default') + + # < --------------------读取文件--------------- > + file_manifest = [] + spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"] + for sp in spl: + _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}') + file_manifest += file_manifest_tmp + + if len(file_manifest) == 0: + chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + # < -------------------预热文本向量化模组--------------- > + chatbot.append(['
'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + print('Checking Text2vec ...') + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + with ProxyNetworkActivate(): # 临时地激活代理网络 + HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") + + # < -------------------构建知识库--------------- > + chatbot.append(['
'.join(file_manifest), "正在构建知识库..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + print('Establishing knowledge archive ...') + with ProxyNetworkActivate(): # 临时地激活代理网络 + kai = knowledge_archive_interface() + kai.feed_archive(file_manifest=file_manifest, id=kai_id) + kai_files = kai.get_loaded_file() + kai_files = '
'.join(kai_files) + # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"]) + # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id() + # chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答' + # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"]) + chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + +@CatchException +def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1): + + # < ------------------- --------------- > + from .crazy_utils import knowledge_archive_interface + kai = knowledge_archive_interface() + + if 'langchain_plugin_embedding' in chatbot._cookies: + resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding']) + else: + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + kai_id = plugin_kwargs.get("advanced_arg", 'default') + resp, prompt = kai.answer_with_archive_by_id(txt, kai_id) + + chatbot.append((txt, '[Local Message] ' + prompt)) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=prompt, inputs_show_user=txt, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], + sys_prompt=system_prompt + ) + history.extend((prompt, gpt_say)) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 diff --git a/crazy_functions/Latex全文润色.py b/crazy_functions/Latex全文润色.py index 8d3f97b..9e1d4b6 100644 --- a/crazy_functions/Latex全文润色.py +++ b/crazy_functions/Latex全文润色.py @@ -238,3 +238,6 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread') + + + diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py new file mode 100644 index 0000000..daac763 --- /dev/null +++ b/crazy_functions/Latex输出PDF结果.py @@ -0,0 +1,263 @@ +from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone +from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str +import glob, os, requests, time +pj = os.path.join +ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") + +# =================================== 工具函数 =============================================== +沙雕GPT啊别犯这些低级翻译错误 = 'You must to translate "agent" to "智能体". ' +def switch_prompt(pfg, mode): + """ + Generate prompts and system prompts based on the mode for proofreading or translating. + Args: + - pfg: Proofreader or Translator instance. + - mode: A string specifying the mode, either 'proofread' or 'translate_zh'. + + Returns: + - inputs_array: A list of strings containing prompts for users to respond to. + - sys_prompt_array: A list of strings containing prompts for system prompts. + """ + n_split = len(pfg.sp_file_contents) + if mode == 'proofread': + inputs_array = [r"Below is a section from an academic paper, proofread this section." + + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + + r"Answer me only with the revised text:" + + f"\n\n{frag}" for frag in pfg.sp_file_contents] + sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] + elif mode == 'translate_zh': + inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese." + 沙雕GPT啊别犯这些低级翻译错误 + + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + + r"Answer me only with the translated text:" + + f"\n\n{frag}" for frag in pfg.sp_file_contents] + sys_prompt_array = ["You are a professional translator." for _ in range(n_split)] + else: + assert False, "未知指令" + return inputs_array, sys_prompt_array + +def desend_to_extracted_folder_if_exist(project_folder): + """ + Descend into the extracted folder if it exists, otherwise return the original folder. + + Args: + - project_folder: A string specifying the folder path. + + Returns: + - A string specifying the path to the extracted folder, or the original folder if there is no extracted folder. + """ + maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)] + if len(maybe_dir) == 0: return project_folder + if maybe_dir[0].endswith('.extract'): return maybe_dir[0] + return project_folder + +def move_project(project_folder, arxiv_id=None): + """ + Create a new work folder and copy the project folder to it. + + Args: + - project_folder: A string specifying the folder path of the project. + + Returns: + - A string specifying the path to the new work folder. + """ + import shutil, time + time.sleep(2) # avoid time string conflict + if arxiv_id is not None: + new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder') + else: + new_workfolder = f'gpt_log/{gen_time_str()}' + try: shutil.rmtree(new_workfolder) + except: pass + shutil.copytree(src=project_folder, dst=new_workfolder) + return new_workfolder + +def arxiv_download(chatbot, history, txt): + def check_cached_translation_pdf(arxiv_id): + translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation') + if not os.path.exists(translation_dir): + os.makedirs(translation_dir) + target_file = pj(translation_dir, 'translate_zh.pdf') + if os.path.exists(target_file): + promote_file_to_downloadzone(target_file) + return target_file + return False + + if not txt.startswith('https://arxiv.org'): + return txt, None + + # <-------------- inspect format -------------> + chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...']) + yield from update_ui(chatbot=chatbot, history=history) + time.sleep(1) # 刷新界面 + + url_ = txt # https://arxiv.org/abs/1707.06690 + if not txt.startswith('https://arxiv.org/abs/'): + msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}" + yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面 + return msg, None + # <-------------- set format -------------> + arxiv_id = url_.split('/abs/')[-1] + cached_translation_pdf = check_cached_translation_pdf(arxiv_id) + if cached_translation_pdf: return cached_translation_pdf, arxiv_id + + url_tar = url_.replace('/abs/', '/e-print/') + translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print') + extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract') + os.makedirs(translation_dir, exist_ok=True) + + # <-------------- download arxiv source file -------------> + dst = pj(translation_dir, arxiv_id+'.tar') + if os.path.exists(dst): + yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面 + else: + yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面 + proxies, = get_conf('proxies') + r = requests.get(url_tar, proxies=proxies) + with open(dst, 'wb+') as f: + f.write(r.content) + # <-------------- extract file -------------> + yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面 + from toolbox import extract_archive + extract_archive(file_path=dst, dest_dir=extract_dst) + return extract_dst, arxiv_id +# ========================================= 插件主程序1 ===================================================== + + +@CatchException +def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # <-------------- information about this plugin -------------> + chatbot.append([ "函数插件功能?", + "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + + # <-------------- check deps -------------> + try: + import glob, os, time + os.system(f'pdflatex -version') + from .latex_utils import Latex精细分解与转化, 编译Latex差别 + except Exception as e: + chatbot.append([ f"解析项目: {txt}", + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- clear history and read input -------------> + history = [] + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- if is a zip/tar file -------------> + project_folder = desend_to_extracted_folder_if_exist(project_folder) + + + # <-------------- move latex project away from temp folder -------------> + project_folder = move_project(project_folder, arxiv_id=None) + + + # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> + if not os.path.exists(project_folder + '/merge_proofread.tex'): + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) + + + # <-------------- compile PDF -------------> + success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', + work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) + + + # <-------------- zip PDF -------------> + zip_result(project_folder) + if success: + chatbot.append((f"成功啦", '请查收结果(压缩包)...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + else: + chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + + # <-------------- we are done -------------> + return success + + +# ========================================= 插件主程序2 ===================================================== + +@CatchException +def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # <-------------- information about this plugin -------------> + chatbot.append([ + "函数插件功能?", + "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + + # <-------------- check deps -------------> + try: + import glob, os, time + os.system(f'pdflatex -version') + from .latex_utils import Latex精细分解与转化, 编译Latex差别 + except Exception as e: + chatbot.append([ f"解析项目: {txt}", + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- clear history and read input -------------> + history = [] + txt, arxiv_id = yield from arxiv_download(chatbot, history, txt) + if txt.endswith('.pdf'): + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + if os.path.exists(txt): + project_folder = txt + else: + if txt == "": txt = '空空如也的输入栏' + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] + if len(file_manifest) == 0: + report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + return + + + # <-------------- if is a zip/tar file -------------> + project_folder = desend_to_extracted_folder_if_exist(project_folder) + + + # <-------------- move latex project away from temp folder -------------> + project_folder = move_project(project_folder, arxiv_id) + + + # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> + if not os.path.exists(project_folder + '/merge_translate_zh.tex'): + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt) + + + # <-------------- compile PDF -------------> + success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', + work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) + + # <-------------- zip PDF -------------> + zip_result(project_folder) + if success: + chatbot.append((f"成功啦", '请查收结果(压缩包)...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + else: + chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) + yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + + # <-------------- we are done -------------> + return success diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index a9bfbf8..d4e3274 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -3,6 +3,8 @@ 这个文件用于函数插件的单元测试 运行方法 python crazy_functions/crazy_functions_test.py """ + +# ============================================================================================================================== def validate_path(): import os, sys @@ -10,10 +12,16 @@ def validate_path(): root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') os.chdir(root_dir_assume) sys.path.append(root_dir_assume) - validate_path() # validate path so you can run from base directory + +# ============================================================================================================================== + from colorful import * from toolbox import get_conf, ChatBotWithCookies +import contextlib +import os +import sys +from functools import wraps proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') @@ -30,7 +38,43 @@ history = [] system_prompt = "Serve me as a writing and programming assistant." web_port = 1024 +# ============================================================================================================================== +def silence_stdout(func): + @wraps(func) + def wrapper(*args, **kwargs): + _original_stdout = sys.stdout + sys.stdout = open(os.devnull, 'w') + for q in func(*args, **kwargs): + sys.stdout = _original_stdout + yield q + sys.stdout = open(os.devnull, 'w') + sys.stdout.close() + sys.stdout = _original_stdout + return wrapper + +class CLI_Printer(): + def __init__(self) -> None: + self.pre_buf = "" + + def print(self, buf): + bufp = "" + for index, chat in enumerate(buf): + a, b = chat + bufp += sprint亮靛('[Me]:' + a) + '\n' + bufp += '[GPT]:' + b + if index < len(buf)-1: + bufp += '\n' + + if self.pre_buf!="" and bufp.startswith(self.pre_buf): + print(bufp[len(self.pre_buf):], end='') + else: + print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='') + self.pre_buf = bufp + return + +cli_printer = CLI_Printer() +# ============================================================================================================================== def test_解析一个Python项目(): from crazy_functions.解析项目源代码 import 解析一个Python项目 txt = "crazy_functions/test_project/python/dqn" @@ -116,6 +160,52 @@ def test_Markdown多语言(): for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): print(cb) +def test_Langchain知识库(): + from crazy_functions.Langchain知识库 import 知识库问答 + txt = "./" + chatbot = ChatBotWithCookies(llm_kwargs) + for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + + chatbot = ChatBotWithCookies(cookies) + from crazy_functions.Langchain知识库 import 读取知识库作答 + txt = "What is the installation method?" + for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + +def test_Langchain知识库读取(): + from crazy_functions.Langchain知识库 import 读取知识库作答 + txt = "远程云服务器部署?" + for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + +def test_Latex(): + from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF + txt = "C:/Users/fuqingxu/Desktop/proofread" + txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/paperx" + txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery" + txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06" + txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20" + txt = r"https://arxiv.org/abs/1902.03185" + txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40" + txt = r"https://arxiv.org/abs/2305.18290" + txt = r"https://arxiv.org/abs/2305.17608" + # txt = r"https://arxiv.org/abs/2306.00324" + txt = r"https://arxiv.org/abs/2211.16068" + + for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + cli_printer.print(cb) # print(cb) + + + + # txt = "2302.02948.tar" + # print(txt) + # main_tex, work_folder = Latex预处理(txt) + # print('main tex:', main_tex) + # res = 编译Latex(main_tex, work_folder) + # # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + # cli_printer.print(cb) # print(cb) + # test_解析一个Python项目() @@ -129,7 +219,8 @@ def test_Markdown多语言(): # test_联网回答问题() # test_解析ipynb文件() # test_数学动画生成manim() -test_Markdown多语言() - +# test_Langchain知识库() +# test_Langchain知识库读取() +test_Latex() input("程序完成,回车退出。") print("退出。") \ No newline at end of file diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index de205d7..96301ff 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -1,4 +1,5 @@ from toolbox import update_ui, get_conf, trimmed_format_exc +import threading def input_clipping(inputs, history, max_token_limit): import numpy as np @@ -606,3 +607,94 @@ def get_files_from_everything(txt, type): # type='.md' success = False return success, file_manifest, project_folder + + + + +def Singleton(cls): + _instance = {} + + def _singleton(*args, **kargs): + if cls not in _instance: + _instance[cls] = cls(*args, **kargs) + return _instance[cls] + + return _singleton + + +@Singleton +class knowledge_archive_interface(): + def __init__(self) -> None: + self.threadLock = threading.Lock() + self.current_id = "" + self.kai_path = None + self.qa_handle = None + self.text2vec_large_chinese = None + + def get_chinese_text2vec(self): + if self.text2vec_large_chinese is None: + # < -------------------预热文本向量化模组--------------- > + from toolbox import ProxyNetworkActivate + print('Checking Text2vec ...') + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + with ProxyNetworkActivate(): # 临时地激活代理网络 + self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese") + + return self.text2vec_large_chinese + + + def feed_archive(self, file_manifest, id="default"): + self.threadLock.acquire() + # import uuid + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=file_manifest, + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + + def get_current_archive_id(self): + return self.current_id + + def get_loaded_file(self): + return self.qa_handle.get_loaded_file() + + def answer_with_archive_by_id(self, txt, id): + self.threadLock.acquire() + if not self.current_id == id: + self.current_id = id + from zh_langchain import construct_vector_store + self.qa_handle, self.kai_path = construct_vector_store( + vs_id=self.current_id, + files=[], + sentence_size=100, + history=[], + one_conent="", + one_content_segmentation="", + text2vec = self.get_chinese_text2vec(), + ) + VECTOR_SEARCH_SCORE_THRESHOLD = 0 + VECTOR_SEARCH_TOP_K = 4 + CHUNK_SIZE = 512 + resp, prompt = self.qa_handle.get_knowledge_based_conent_test( + query = txt, + vs_path = self.kai_path, + score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, + vector_search_top_k=VECTOR_SEARCH_TOP_K, + chunk_conent=True, + chunk_size=CHUNK_SIZE, + text2vec = self.get_chinese_text2vec(), + ) + self.threadLock.release() + return resp, prompt + +def try_install_deps(deps): + for dep in deps: + import subprocess, sys + subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep]) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py new file mode 100644 index 0000000..3e128eb --- /dev/null +++ b/crazy_functions/latex_utils.py @@ -0,0 +1,606 @@ +from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面 +from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone +import os, shutil +import re +pj = os.path.join + +def 寻找Latex主文件(file_manifest, mode): + """ + 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。 + P.S. 但愿没人把latex模板放在里面传进来 + """ + for texf in file_manifest: + if os.path.basename(texf).startswith('merge'): + continue + with open(texf, 'r', encoding='utf8') as f: + file_content = f.read() + if r'\documentclass' in file_content: + return texf + else: + continue + raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + +def merge_tex_files_(project_foler, main_file, mode): + """ + 递归地把多Tex工程整合为一个Tex文档 + """ + for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]): + f = s.group(1) + fp = os.path.join(project_foler, f) + if os.path.exists(fp): + # e.g., \input{srcs/07_appendix.tex} + with open(fp, 'r', encoding='utf-8', errors='replace') as fx: + c = fx.read() + else: + # e.g., \input{srcs/07_appendix} + with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx: + c = fx.read() + c = merge_tex_files_(project_foler, c, mode) + main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:] + return main_file + +def merge_tex_files(project_foler, main_file, mode): + """ + 递归地把多Tex工程整合为一个Tex文档(递归外层) + P.S. 顺便把CTEX塞进去以支持中文 + P.S. 顺便把Latex的注释去除 + """ + main_file = merge_tex_files_(project_foler, main_file, mode) + if mode == 'translate_zh': + pattern = re.compile(r'\\documentclass.*\n') + match = pattern.search(main_file) + position = match.end() + main_file = main_file[:position] + '\\usepackage{CTEX}\n\\usepackage{url}\n' + main_file[position:] + + new_file_remove_comment_lines = [] + for l in main_file.splitlines(): + # 删除整行的空注释 + if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")): + pass + else: + new_file_remove_comment_lines.append(l) + main_file = '\n'.join(new_file_remove_comment_lines) + main_file = re.sub(r'(? None: + self.string = string + self.preserve = preserve + self.next = None + + +def mod_inbraket(match): + """ + 为啥chatgpt会把cite里面的逗号换成中文逗号呀 艹 + """ + # get the matched string + cmd = match.group(1) + str_to_modify = match.group(2) + # modify the matched string + str_to_modify = str_to_modify.replace(':', ':') # 前面是中文冒号,后面是英文冒号 + str_to_modify = str_to_modify.replace(',', ',') # 前面是中文逗号,后面是英文逗号 + # str_to_modify = 'BOOM' + return "\\" + cmd + "{" + str_to_modify + "}" + +def fix_content(final_tex, node_string): + """ + Fix common GPT errors to increase success rate + """ + final_tex = final_tex.replace('%', r'\%') + final_tex = final_tex.replace(r'\%', r'\\%') + final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex) + final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex) + final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex) + if node_string.count('{') != node_string.count('}'): + if final_tex.count('{') != node_string.count('{'): + final_tex = node_string # 出问题了,还原原文 + if final_tex.count('}') != node_string.count('}'): + final_tex = node_string # 出问题了,还原原文 + + return final_tex + +class LatexPaperSplit(): + """ + 将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理 + """ + def __init__(self) -> None: + """ + root是链表的根节点 + """ + self.root = None + + def merge_result(self, arr, mode, msg): + """ + 将GPT处理后的结果融合 + """ + result_string = "" + node = self.root + p = 0 + while True: + if node.preserve: + result_string += node.string + else: + result_string += fix_content(arr[p], node.string) + p += 1 + node = node.next + if node is None: break + if mode == 'translate_zh': + try: + pattern = re.compile(r'\\begin\{abstract\}.*\n') + match = pattern.search(result_string) + position = match.end() + result_string = result_string[:position] + \ + "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成,其内容可靠性没有任何保障,请仔细鉴别并以原文为准。" + \ + "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + \ + msg + \ + "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" + \ + result_string[position:] + except: + pass + return result_string + + def split(self, txt, project_folder): + """ + 将Latex文档分解到一个链表中,每个链表节点用preserve的标志位提示它是否应当被GPT处理 + """ + root = LinkedListNode(txt, False) + def split_worker(root, pattern, flags=0): + lt = root + cnt = 0 + pattern_compile = re.compile(pattern, flags) + while True: + if not lt.preserve: + while True: + res = pattern_compile.search(lt.string) + if not res: break + before = res.string[:res.span()[0]] + this = res.group(0) + after = res.string[res.span()[1]:] + # ====== + lt.string = before + tmp = lt.next + # ====== + mid = LinkedListNode(this, True) + lt.next = mid + # ====== + aft = LinkedListNode(after, False) + mid.next = aft + aft.next = tmp + # ====== + lt = aft + lt = lt.next + cnt += 1 + # print(cnt) + if lt is None: break + + def split_worker_begin_end(root, pattern, flags=0, limit_n_lines=25): + lt = root + cnt = 0 + pattern_compile = re.compile(pattern, flags) + while True: + if not lt.preserve: + while True: + target_string = lt.string + + def search_with_line_limit(target_string): + for res in pattern_compile.finditer(target_string): + cmd = res.group(1) # begin{what} + this = res.group(2) # content between begin and end + white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 'em', 'emph', 'textit', 'textbf'] + if cmd in white_list or this.count('\n') > 25: + sub_res = search_with_line_limit(this) + if not sub_res: continue + else: return sub_res + else: + return res.group(0) + return False + # ====== + # search for first encounter of \begin \end pair with less than 25 lines in the middle + ps = search_with_line_limit(target_string) + if not ps: break + res = re.search(re.escape(ps), target_string, flags) + if not res: assert False + before = res.string[:res.span()[0]] + this = res.group(0) + after = res.string[res.span()[1]:] + # ====== + lt.string = before + tmp = lt.next + # ====== + mid = LinkedListNode(this, True) + lt.next = mid + # ====== + aft = LinkedListNode(after, False) + mid.next = aft + aft.next = tmp + # ====== + lt = aft + lt = lt.next + cnt += 1 + # print(cnt) + if lt is None: break + + + # root 是链表的头 + print('正在分解Latex源文件,构建链表结构') + # 删除iffalse注释 + split_worker(root, r"\\iffalse(.*?)\\fi", re.DOTALL) + # 吸收在25行以内的begin-end组合 + split_worker_begin_end(root, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) + # 吸收匿名公式 + split_worker(root, r"\$\$(.*?)\$\$", re.DOTALL) + # 吸收其他杂项 + split_worker(root, r"(.*?)\\maketitle", re.DOTALL) + split_worker(root, r"\\section\{(.*?)\}") + split_worker(root, r"\\section\*\{(.*?)\}") + split_worker(root, r"\\subsection\{(.*?)\}") + split_worker(root, r"\\subsubsection\{(.*?)\}") + split_worker(root, r"\\bibliography\{(.*?)\}") + split_worker(root, r"\\bibliographystyle\{(.*?)\}") + split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) + split_worker(root, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) + split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) + split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) + split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) + split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) + split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) + split_worker(root, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) + split_worker(root, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) + split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) + split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) + split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) + split_worker(root, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) + split_worker(root, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) + split_worker(root, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) + split_worker(root, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) + split_worker(root, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) + split_worker(root, r"\\item ") + split_worker(root, r"\\label\{(.*?)\}") + split_worker(root, r"\\begin\{(.*?)\}") + split_worker(root, r"\\vspace\{(.*?)\}") + split_worker(root, r"\\hspace\{(.*?)\}") + split_worker(root, r"\\end\{(.*?)\}") + + node = root + while True: + if len(node.string.strip('\n').strip(''))==0: node.preserve = True + if len(node.string.strip('\n').strip(''))<50: node.preserve = True + node = node.next + if node is None: break + + # 修复括号 + node = root + while True: + string = node.string + if node.preserve: + node = node.next + if node is None: break + continue + def break_check(string): + str_stack = [""] # (lv, index) + for i, c in enumerate(string): + if c == '{': + str_stack.append('{') + elif c == '}': + if len(str_stack) == 1: + print('stack kill') + return i + str_stack.pop(-1) + else: + str_stack[-1] += c + return -1 + bp = break_check(string) + + if bp == -1: + pass + elif bp == 0: + node.string = string[:1] + q = LinkedListNode(string[1:], False) + q.next = node.next + node.next = q + else: + node.string = string[:bp] + q = LinkedListNode(string[bp:], False) + q.next = node.next + node.next = q + + node = node.next + if node is None: break + + node = root + while True: + if len(node.string.strip('\n').strip(''))==0: node.preserve = True + if len(node.string.strip('\n').strip(''))<50: node.preserve = True + node = node.next + if node is None: break + + # 将前后断行符脱离 + node = root + prev_node = None + while True: + if not node.preserve: + lstriped_ = node.string.lstrip().lstrip('\n') + if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)): + prev_node.string += node.string[:-len(lstriped_)] + node.string = lstriped_ + rstriped_ = node.string.rstrip().rstrip('\n') + if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)): + node.next.string = node.string[len(rstriped_):] + node.next.string + node.string = rstriped_ + # ===== + prev_node = node + node = node.next + if node is None: break + + # 将分解结果返回 res_to_t + with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: + res_to_t = [] + node = root + while True: + show_html = node.string.replace('\n','
') + if not node.preserve: + res_to_t.append(node.string) + f.write(f'

#{show_html}#

') + else: + f.write(f'

{show_html}

') + node = node.next + if node is None: break + + self.root = root + self.sp = res_to_t + return self.sp + +class LatexPaperFileGroup(): + def __init__(self): + self.file_paths = [] + self.file_contents = [] + self.sp_file_contents = [] + self.sp_file_index = [] + self.sp_file_tag = [] + + # count_token + from request_llm.bridge_all import model_info + enc = model_info["gpt-3.5-turbo"]['tokenizer'] + def get_token_num(txt): return len(enc.encode(txt, disallowed_special=())) + self.get_token_num = get_token_num + + def run_file_split(self, max_token_limit=1900): + """ + 将长文本分离开来 + """ + for index, file_content in enumerate(self.file_contents): + if self.get_token_num(file_content) < max_token_limit: + self.sp_file_contents.append(file_content) + self.sp_file_index.append(index) + self.sp_file_tag.append(self.file_paths[index]) + else: + from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf + segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) + for j, segment in enumerate(segments): + self.sp_file_contents.append(segment) + self.sp_file_index.append(index) + self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") + print('Segmentation: done') + + def merge_result(self): + self.file_result = ["" for _ in range(len(self.file_paths))] + for r, k in zip(self.sp_file_result, self.sp_file_index): + self.file_result[k] += r + + def write_result(self): + manifest = [] + for path, res in zip(self.file_paths, self.file_result): + with open(path + '.polish.tex', 'w', encoding='utf8') as f: + manifest.append(path + '.polish.tex') + f.write(res) + return manifest + + + +def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None): + import time, os, re + from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency + from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件 + + # <-------- 寻找主tex文件 ----------> + maintex = 寻找Latex主文件(file_manifest, mode) + chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + time.sleep(5) + + # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ----------> + main_tex_basename = os.path.basename(maintex) + assert main_tex_basename.endswith('.tex') + main_tex_basename_bare = main_tex_basename[:-4] + may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl') + if os.path.exists(may_exist_bbl): + shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl')) + shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl')) + shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl')) + + with open(maintex, 'r', encoding='utf-8', errors='replace') as f: + content = f.read() + merged_content = merge_tex_files(project_folder, content, mode) + + with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f: + f.write(merged_content) + + # <-------- 精细切分latex文件 ----------> + lps = LatexPaperSplit() + res = lps.split(merged_content, project_folder) + + # <-------- 拆分过长的latex片段 ----------> + pfg = LatexPaperFileGroup() + for index, r in enumerate(res): + pfg.file_paths.append('segment-' + str(index)) + pfg.file_contents.append(r) + + pfg.run_file_split(max_token_limit=1024) + n_split = len(pfg.sp_file_contents) + + # <-------- 根据需要切换prompt ----------> + inputs_array, sys_prompt_array = switch_prompt(pfg, mode) + inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag] + + if os.path.exists(pj(project_folder,'temp.pkl')): + + # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ----------> + pfg = objload(file=pj(project_folder,'temp.pkl')) + + else: + # <-------- gpt 多线程请求 ----------> + gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( + inputs_array=inputs_array, + inputs_show_user_array=inputs_show_user_array, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history_array=[[""] for _ in range(n_split)], + sys_prompt_array=sys_prompt_array, + # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待 + scroller_max_len = 40 + ) + + # <-------- 文本碎片重组为完整的tex片段 ----------> + pfg.sp_file_result = [] + for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents): + pfg.sp_file_result.append(gpt_say) + pfg.merge_result() + + # <-------- 临时存储用于调试 ----------> + pfg.get_token_num = None + objdump(pfg, file=pj(project_folder,'temp.pkl')) + + + # <-------- 写出文件 ----------> + msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" + final_tex = lps.merge_result(pfg.file_result, mode, msg) + with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f: + f.write(final_tex) + + # <-------- 整理结果, 退出 ----------> + chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # <-------- 返回 ----------> + return project_folder + f'/merge_{mode}.tex' + + + +def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified): + try: + with open(log_path, 'r', encoding='utf-8', errors='replace') as f: + log = f.read() + with open(file_path, 'r', encoding='utf-8', errors='replace') as f: + file_lines = f.readlines() + import re + buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log) + buggy_lines = [int(l) for l in buggy_lines] + buggy_lines = sorted(buggy_lines) + print("removing lines that has errors", buggy_lines) + file_lines.pop(buggy_lines[0]-1) + with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f: + f.writelines(file_lines) + return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines + except: + return False, 0, [0] + + +def compile_latex_with_timeout(command, timeout=60): + import subprocess + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + try: + stdout, stderr = process.communicate(timeout=timeout) + except subprocess.TimeoutExpired: + process.kill() + stdout, stderr = process.communicate() + print("Process timed out!") + return False + return True + +def 编译Latex差别(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder): + import os, time + current_dir = os.getcwd() + n_fix = 1 + max_try = 32 + chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,则大概率是卡死在Latex里面了。不幸卡死时请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) + chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面 + yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面 + + while True: + import os + # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + + if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')): + # 只有第二步成功,才能继续下面的步骤 + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面 + if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')): + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir) + if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')): + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir) + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 + print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + + # <---------------------> + os.chdir(current_dir) + + # <---------- 检查结果 -----------> + results_ = "" + original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf')) + modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')) + diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf')) + results_ += f"原始PDF编译是否成功: {original_pdf_success};" + results_ += f"转化PDF编译是否成功: {modified_pdf_success};" + results_ += f"对比PDF编译是否成功: {diff_pdf_success};" + yield from update_ui_lastest_msg(f'第{n_fix}编译结束:
{results_}...', chatbot, history) # 刷新Gradio前端界面 + + if modified_pdf_success: + yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(current_dir) + result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') + if os.path.exists(pj(work_folder, '..', 'translation')): + shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) + promote_file_to_downloadzone(result_pdf) + return True # 成功啦 + else: + if n_fix>=max_try: break + n_fix += 1 + can_retry, main_file_modified, buggy_lines = remove_buggy_lines( + file_path=pj(work_folder_modified, f'{main_file_modified}.tex'), + log_path=pj(work_folder_modified, f'{main_file_modified}.log'), + tex_name=f'{main_file_modified}.tex', + tex_name_pure=f'{main_file_modified}', + n_fix=n_fix, + work_folder_modified=work_folder_modified, + ) + yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面 + if not can_retry: break + + os.chdir(current_dir) + return False # 失败啦 + + + diff --git a/docs/README.md.Italian.md b/docs/README.md.Italian.md index 04c5ed6..301ce69 100644 --- a/docs/README.md.Italian.md +++ b/docs/README.md.Italian.md @@ -2,11 +2,11 @@ > > Durante l'installazione delle dipendenze, selezionare rigorosamente le **versioni specificate** nel file requirements.txt. > -> ` pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/` +> ` pip install -r requirements.txt` -# GPT Ottimizzazione Accademica (GPT Academic) +# GPT Ottimizzazione Accademica (GPT Academic) -**Se ti piace questo progetto, ti preghiamo di dargli una stella. Se hai sviluppato scorciatoie accademiche o plugin funzionali più utili, non esitare ad aprire una issue o pull request. Abbiamo anche una README in [Inglese|](docs/README_EN.md)[Giapponese|](docs/README_JP.md)[Coreano|](https://github.com/mldljyh/ko_gpt_academic)[Russo|](docs/README_RS.md)[Francese](docs/README_FR.md) tradotta da questo stesso progetto. +**Se ti piace questo progetto, ti preghiamo di dargli una stella. Se hai sviluppato scorciatoie accademiche o plugin funzionali più utili, non esitare ad aprire una issue o pull request. Abbiamo anche una README in [Inglese|](README_EN.md)[Giapponese|](README_JP.md)[Coreano|](https://github.com/mldljyh/ko_gpt_academic)[Russo|](README_RS.md)[Francese](README_FR.md) tradotta da questo stesso progetto. Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`multi_language.py`](multi_language.py) (sperimentale). > **Nota** @@ -17,7 +17,9 @@ Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`m > > 3. Questo progetto è compatibile e incoraggia l'utilizzo di grandi modelli di linguaggio di produzione nazionale come chatglm, RWKV, Pangu ecc. Supporta la coesistenza di più api-key e può essere compilato nel file di configurazione come `API_KEY="openai-key1,openai-key2,api2d-key3"`. Per sostituire temporaneamente `API_KEY`, inserire `API_KEY` temporaneo nell'area di input e premere Invio per renderlo effettivo. -
Funzione | Descrizione +
+ +Funzione | Descrizione --- | --- Correzione immediata | Supporta correzione immediata e ricerca degli errori di grammatica del documento con un solo clic Traduzione cinese-inglese immediata | Traduzione cinese-inglese immediata con un solo clic @@ -41,6 +43,8 @@ Avvia il tema di gradio [scuro](https://github.com/binary-husky/chatgpt_academic Supporto per maggiori modelli LLM, supporto API2D | Sentirsi serviti simultaneamente da GPT3.5, GPT4, [Tsinghua ChatGLM](https://github.com/THUDM/ChatGLM-6B), [Fudan MOSS](https://github.com/OpenLMLab/MOSS) deve essere una grande sensazione, giusto? Ulteriori modelli LLM supportat,i supporto per l'implementazione di Huggingface | Aggiunta di un'interfaccia Newbing (Nuovo Bing), introdotta la compatibilità con Tsinghua [Jittorllms](https://github.com/Jittor/JittorLLMs), [LLaMA](https://github.com/facebookresearch/llama), [RWKV](https://github.com/BlinkDL/ChatRWKV) e [PanGu-α](https://openi.org.cn/pangu/) Ulteriori dimostrazioni di nuove funzionalità (generazione di immagini, ecc.)... | Vedere la fine di questo documento... +
+ - Nuova interfaccia (modificare l'opzione LAYOUT in `config.py` per passare dal layout a sinistra e a destra al layout superiore e inferiore)
@@ -202,11 +206,13 @@ ad esempio 2. Plugin di funzione personalizzati Scrivi plugin di funzione personalizzati e esegui tutte le attività che desideri o non hai mai pensato di fare. -La difficoltà di scrittura e debug dei plugin del nostro progetto è molto bassa. Se si dispone di una certa conoscenza di base di Python, è possibile realizzare la propria funzione del plugin seguendo il nostro modello. Per maggiori dettagli, consultare la [guida al plugin per funzioni] (https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97). +La difficoltà di scrittura e debug dei plugin del nostro progetto è molto bassa. Se si dispone di una certa conoscenza di base di Python, è possibile realizzare la propria funzione del plugin seguendo il nostro modello. Per maggiori dettagli, consultare la [guida al plugin per funzioni](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97). --- # Ultimo aggiornamento -## Nuove funzionalità dinamiche1. Funzionalità di salvataggio della conversazione. Nell'area dei plugin della funzione, fare clic su "Salva la conversazione corrente" per salvare la conversazione corrente come file html leggibile e ripristinabile, inoltre, nell'area dei plugin della funzione (menu a discesa), fare clic su "Carica la cronologia della conversazione archiviata" per ripristinare la conversazione precedente. Suggerimento: fare clic su "Carica la cronologia della conversazione archiviata" senza specificare il file consente di visualizzare la cache degli archivi html di cronologia, fare clic su "Elimina tutti i record di cronologia delle conversazioni locali" per eliminare tutte le cache degli archivi html. +## Nuove funzionalità dinamiche + +1. Funzionalità di salvataggio della conversazione. Nell'area dei plugin della funzione, fare clic su "Salva la conversazione corrente" per salvare la conversazione corrente come file html leggibile e ripristinabile, inoltre, nell'area dei plugin della funzione (menu a discesa), fare clic su "Carica la cronologia della conversazione archiviata" per ripristinare la conversazione precedente. Suggerimento: fare clic su "Carica la cronologia della conversazione archiviata" senza specificare il file consente di visualizzare la cache degli archivi html di cronologia, fare clic su "Elimina tutti i record di cronologia delle conversazioni locali" per eliminare tutte le cache degli archivi html.
@@ -307,4 +313,4 @@ https://github.com/kaixindelele/ChatPaper # Altro: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -``` \ No newline at end of file +``` diff --git a/docs/README.md.Korean.md b/docs/README.md.Korean.md index d94aaf1..f15d477 100644 --- a/docs/README.md.Korean.md +++ b/docs/README.md.Korean.md @@ -17,7 +17,9 @@ GPT를 이용하여 프로젝트를 임의의 언어로 번역하려면 [`multi_ > > 3. 이 프로젝트는 국내 언어 모델 chatglm과 RWKV, 판고 등의 시도와 호환 가능합니다. 여러 개의 api-key를 지원하며 설정 파일에 "API_KEY="openai-key1,openai-key2,api2d-key3""와 같이 작성할 수 있습니다. `API_KEY`를 임시로 변경해야하는 경우 입력 영역에 임시 `API_KEY`를 입력 한 후 엔터 키를 누르면 즉시 적용됩니다. -
기능 | 설명 +
+ +기능 | 설명 --- | --- 원 키워드 | 원 키워드 및 논문 문법 오류를 찾는 기능 지원 한-영 키워드 | 한-영 키워드 지원 @@ -265,4 +267,4 @@ https://github.com/kaixindelele/ChatPaper # 더 많은 : https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -``` \ No newline at end of file +``` diff --git a/docs/README.md.Portuguese.md b/docs/README.md.Portuguese.md index 816ced1..98f6054 100644 --- a/docs/README.md.Portuguese.md +++ b/docs/README.md.Portuguese.md @@ -2,7 +2,7 @@ > > Ao instalar as dependências, por favor, selecione rigorosamente as versões **especificadas** no arquivo requirements.txt. > -> `pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/` +> `pip install -r requirements.txt` > # Otimização acadêmica GPT (GPT Academic) @@ -18,7 +18,9 @@ Para traduzir este projeto para qualquer idioma com o GPT, leia e execute [`mult > > 3. Este projeto é compatível com e incentiva o uso de modelos de linguagem nacionais, como chatglm e RWKV, Pangolin, etc. Suporta a coexistência de várias chaves de API e pode ser preenchido no arquivo de configuração como `API_KEY="openai-key1,openai-key2,api2d-key3"`. Quando precisar alterar temporariamente o `API_KEY`, basta digitar o `API_KEY` temporário na área de entrada e pressionar Enter para que ele entre em vigor. -
Funcionalidade | Descrição +
+ +Funcionalidade | Descrição --- | --- Um clique de polimento | Suporte a um clique polimento, um clique encontrar erros de gramática no artigo Tradução chinês-inglês de um clique | Tradução chinês-inglês de um clique @@ -216,7 +218,9 @@ Para mais detalhes, consulte o [Guia do plug-in de função.](https://github.com --- # Última atualização -## Novas funções dinâmicas.1. Função de salvamento de diálogo. Ao chamar o plug-in de função "Salvar diálogo atual", é possível salvar o diálogo atual em um arquivo html legível e reversível. Além disso, ao chamar o plug-in de função "Carregar arquivo de histórico de diálogo" no menu suspenso da área de plug-in, é possível restaurar uma conversa anterior. Dica: clicar em "Carregar arquivo de histórico de diálogo" sem especificar um arquivo permite visualizar o cache do arquivo html de histórico. Clicar em "Excluir todo o registro de histórico de diálogo local" permite excluir todo o cache de arquivo html. +## Novas funções dinâmicas. + +1. Função de salvamento de diálogo. Ao chamar o plug-in de função "Salvar diálogo atual", é possível salvar o diálogo atual em um arquivo html legível e reversível. Além disso, ao chamar o plug-in de função "Carregar arquivo de histórico de diálogo" no menu suspenso da área de plug-in, é possível restaurar uma conversa anterior. Dica: clicar em "Carregar arquivo de histórico de diálogo" sem especificar um arquivo permite visualizar o cache do arquivo html de histórico. Clicar em "Excluir todo o registro de histórico de diálogo local" permite excluir todo o cache de arquivo html.
@@ -317,4 +321,4 @@ https://github.com/kaixindelele/ChatPaper # Mais: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -``` \ No newline at end of file +``` diff --git a/main.py b/main.py index b9abf0e..da3625e 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,7 @@ import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 def main(): import gradio as gr - if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "请用 pip install -r requirements.txt 安装依赖" + if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt" from request_llm.bridge_all import predict from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 diff --git a/toolbox.py b/toolbox.py index 0b66d11..54b5aea 100644 --- a/toolbox.py +++ b/toolbox.py @@ -1,6 +1,6 @@ import markdown import importlib -import traceback +import time import inspect import re import gradio as gr @@ -102,6 +102,17 @@ def update_ui(chatbot, history, msg='正常', *args): # 刷新界面 yield chatbot.get_cookies(), chatbot, history, msg pool.submit(func_box.thread_write_chat, chatbot) +def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面 + """ + 刷新用户界面 + """ + if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg]) + chatbot[-1] = list(chatbot[-1]) + chatbot[-1][-1] = lastmsg + yield from update_ui(chatbot=chatbot, history=history) + time.sleep(delay) + + def trimmed_format_exc(): import os, traceback str = traceback.format_exc() @@ -115,7 +126,7 @@ def CatchException(f): """ @wraps(f) - def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): + def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1): try: yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT) except Exception as e: @@ -458,6 +469,13 @@ def find_recent_files(directory): return recent_files +def promote_file_to_downloadzone(file, rename_file=None): + # 将文件复制一份到下载区 + import shutil + if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}' + new_path = os.path.join(f'./gpt_log/', rename_file) + if os.path.exists(new_path): os.remove(new_path) + shutil.copyfile(file, new_path) def get_user_upload(chatbot, ipaddr: gr.Request): @@ -807,6 +825,8 @@ def clip_history(inputs, history, tokenizer, max_token_limit): 其他小工具: - zip_folder: 把某个路径下所有文件压缩,然后转移到指定的另一个路径中(gpt写的) - gen_time_str: 生成时间戳 + - ProxyNetworkActivate: 临时地启动代理网络(如果有) + - objdump/objload: 快捷的调试函数 ======================================================================== """ @@ -841,11 +861,15 @@ def zip_folder(source_folder, dest_folder, zip_name): print(f"Zip file created at {zip_file}") +def zip_result(folder): + import time + t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + zip_folder(folder, './gpt_log/', f'{t}-result.zip') + def gen_time_str(): import time return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) - class ProxyNetworkActivate(): """ 这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理 @@ -854,12 +878,27 @@ class ProxyNetworkActivate(): from toolbox import get_conf proxies, = get_conf('proxies') if 'no_proxy' in os.environ: os.environ.pop('no_proxy') - os.environ['HTTP_PROXY'] = proxies['http'] - os.environ['HTTPS_PROXY'] = proxies['https'] + if proxies is not None: + if 'http' in proxies: os.environ['HTTP_PROXY'] = proxies['http'] + if 'https' in proxies: os.environ['HTTPS_PROXY'] = proxies['https'] return self def __exit__(self, exc_type, exc_value, traceback): os.environ['no_proxy'] = '*' if 'HTTP_PROXY' in os.environ: os.environ.pop('HTTP_PROXY') if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY') - return \ No newline at end of file + return + +def objdump(obj, file='objdump.tmp'): + import pickle + with open(file, 'wb+') as f: + pickle.dump(obj, f) + return + +def objload(file='objdump.tmp'): + import pickle, os + if not os.path.exists(file): + return + with open(file, 'rb') as f: + return pickle.load(f) + \ No newline at end of file