From f3205994ea670f299b2a4a5a8135f5501f8319ff Mon Sep 17 00:00:00 2001 From: XiaojianTang <277172719@qq.com> Date: Fri, 26 May 2023 23:22:12 +0800 Subject: [PATCH 01/78] =?UTF-8?q?=E5=A2=9E=E5=8A=A0azure=20openai=20api?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 7 + request_llm/bridge_all.py | 13 ++ request_llm/bridge_azure_test.py | 241 +++++++++++++++++++++++++++++++ 3 files changed, 261 insertions(+) create mode 100644 request_llm/bridge_azure_test.py diff --git a/config.py b/config.py index 14b089e..afaf6a8 100644 --- a/config.py +++ b/config.py @@ -1,6 +1,13 @@ # [step 1]>> 例如: API_KEY = "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" (此key无效) API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2" +#增加关于AZURE的配置信息, 可以在AZURE网页中找到 +AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/" +AZURE_API_KEY = "填入azure openai api的密钥" +AZURE_API_VERSION = "填入api版本" +AZURE_ENGINE = "填入ENGINE" + + # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 USE_PROXY = False if USE_PROXY: diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index b6efe21..25c5a8b 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -16,6 +16,9 @@ from toolbox import get_conf, trimmed_format_exc from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui from .bridge_chatgpt import predict as chatgpt_ui +from .bridge_azure_test import predict_no_ui_long_connection as azure_noui +from .bridge_azure_test import predict as azure_ui + from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui from .bridge_chatglm import predict as chatglm_ui @@ -93,6 +96,16 @@ model_info = { "token_cnt": get_token_num_gpt4, }, + # azure openai + "azure-gpt35":{ + "fn_with_ui": azure_ui, + "fn_without_ui": azure_noui, + "endpoint": get_conf("AZURE_ENDPOINT"), + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + # api_2d "api2d-gpt-3.5-turbo": { "fn_with_ui": chatgpt_ui, diff --git a/request_llm/bridge_azure_test.py b/request_llm/bridge_azure_test.py new file mode 100644 index 0000000..edc68f7 --- /dev/null +++ b/request_llm/bridge_azure_test.py @@ -0,0 +1,241 @@ +""" + 该文件中主要包含三个函数 + + 不具备多线程能力的函数: + 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程 + + 具备多线程调用能力的函数 + 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑 + 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程 +""" + +import logging +import traceback +import importlib +import openai +import time + + +# 读取config.py文件中关于AZURE OPENAI API的信息 +from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc +TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \ + get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY") + + +def get_full_error(chunk, stream_response): + """ + 获取完整的从Openai返回的报错 + """ + while True: + try: + chunk += next(stream_response) + except: + break + return chunk + +def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None): + """ + 发送至azure openai api,流式获取输出。 + 用于基础的对话功能。 + inputs 是本次问询的输入 + top_p, temperature是chatGPT的内部调优参数 + history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误) + chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容 + additional_fn代表点击的哪个按钮,按钮见functional.py + """ + print(llm_kwargs["llm_model"]) + + if additional_fn is not None: + import core_functional + importlib.reload(core_functional) # 热更新prompt + core_functional = core_functional.get_core_functions() + if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话) + inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"] + + raw_input = inputs + logging.info(f'[raw_input] {raw_input}') + chatbot.append((inputs, "")) + yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面 + + + payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream) + + history.append(inputs); history.append("") + + retry = 0 + while True: + try: + + openai.api_type = "azure" + openai.api_version = AZURE_API_VERSION + openai.api_base = AZURE_ENDPOINT + openai.api_key = AZURE_API_KEY + response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break + + except: + retry += 1 + chatbot[-1] = ((chatbot[-1][0], "获取response失败,重试中。。。")) + retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else "" + yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面 + if retry > MAX_RETRY: raise TimeoutError + + gpt_replying_buffer = "" + is_head_of_the_stream = True + if stream: + + stream_response = response + + while True: + try: + chunk = next(stream_response) + + except StopIteration: + from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```' + chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}") + yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面 + return + + if is_head_of_the_stream and (r'"object":"error"' not in chunk): + # 数据流的第一帧不携带content + is_head_of_the_stream = False; continue + + if chunk: + #print(chunk) + try: + if "delta" in chunk["choices"][0]: + if chunk["choices"][0]["finish_reason"] == "stop": + logging.info(f'[response] {gpt_replying_buffer}') + break + status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}" + gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"] + + history[-1] = gpt_replying_buffer + chatbot[-1] = (history[-2], history[-1]) + yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面 + + except Exception as e: + traceback.print_exc() + yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面 + chunk = get_full_error(chunk, stream_response) + + error_msg = chunk + yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 + return + + +def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): + """ + 发送至AZURE OPENAI API,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。 + inputs: + 是本次问询的输入 + sys_prompt: + 系统静默prompt + llm_kwargs: + chatGPT的内部调优参数 + history: + 是之前的对话列表 + observe_window = None: + 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗 + """ + watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 + payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) + retry = 0 + while True: + + try: + openai.api_type = "azure" + openai.api_version = AZURE_API_VERSION + openai.api_base = AZURE_ENDPOINT + openai.api_key = AZURE_API_KEY + response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break + + except: + retry += 1 + traceback.print_exc() + if retry > MAX_RETRY: raise TimeoutError + if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') + + + stream_response = response + result = '' + while True: + try: chunk = next(stream_response) + except StopIteration: + break + except: + chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。 + + if len(chunk)==0: continue + if not chunk.startswith('data:'): + error_msg = get_full_error(chunk, stream_response) + if "reduce the length" in error_msg: + raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg) + else: + raise RuntimeError("AZURE OPENAI API拒绝了请求:" + error_msg) + if ('data: [DONE]' in chunk): break + + delta = chunk["delta"] + if len(delta) == 0: break + if "role" in delta: continue + if "content" in delta: + result += delta["content"] + if not console_slience: print(delta["content"], end='') + if observe_window is not None: + # 观测窗,把已经获取的数据显示出去 + if len(observe_window) >= 1: observe_window[0] += delta["content"] + # 看门狗,如果超过期限没有喂狗,则终止 + if len(observe_window) >= 2: + if (time.time()-observe_window[1]) > watch_dog_patience: + raise RuntimeError("用户取消了程序。") + else: raise RuntimeError("意外Json结构:"+delta) + if chunk['finish_reason'] == 'length': + raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。") + return result + + +def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream): + """ + 整合所有信息,选择LLM模型,生成 azure openai api请求,为发送请求做准备 + """ + + conversation_cnt = len(history) // 2 + + messages = [{"role": "system", "content": system_prompt}] + if conversation_cnt: + for index in range(0, 2*conversation_cnt, 2): + what_i_have_asked = {} + what_i_have_asked["role"] = "user" + what_i_have_asked["content"] = history[index] + what_gpt_answer = {} + what_gpt_answer["role"] = "assistant" + what_gpt_answer["content"] = history[index+1] + if what_i_have_asked["content"] != "": + if what_gpt_answer["content"] == "": continue + messages.append(what_i_have_asked) + messages.append(what_gpt_answer) + else: + messages[-1]['content'] = what_gpt_answer['content'] + + what_i_ask_now = {} + what_i_ask_now["role"] = "user" + what_i_ask_now["content"] = inputs + messages.append(what_i_ask_now) + + payload = { + "model": llm_kwargs['llm_model'], + "messages": messages, + "temperature": llm_kwargs['temperature'], # 1.0, + "top_p": llm_kwargs['top_p'], # 1.0, + "n": 1, + "stream": stream, + "presence_penalty": 0, + "frequency_penalty": 0, + "engine": AZURE_ENGINE + } + try: + print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") + except: + print('输入中可能存在乱码。') + return payload + + From c65def90f3bcc152113580488a176e2f232fa140 Mon Sep 17 00:00:00 2001 From: MengDanzz <95761983+MengDanzz@users.noreply.github.com> Date: Tue, 6 Jun 2023 14:36:30 +0800 Subject: [PATCH 02/78] =?UTF-8?q?=E5=B0=86Dockerfile=20COPY=E5=88=86?= =?UTF-8?q?=E6=88=90=E4=B8=A4=E6=AE=B5=EF=BC=8C=E7=BC=93=E5=AD=98=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E5=BA=93=EF=BC=8C=E9=87=8D=E6=96=B0=E6=9E=84=E5=BB=BA?= =?UTF-8?q?=E4=B8=8D=E9=9C=80=E8=A6=81=E9=87=8D=E6=96=B0=E5=AE=89=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 19d988f..aa4eee8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,12 +10,14 @@ RUN echo '[global]' > /etc/pip.conf && \ WORKDIR /gpt -# 装载项目文件 -COPY . . + + # 安装依赖 +COPY requirements.txt ./ RUN pip3 install -r requirements.txt - +# 装载项目文件 +COPY . . # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From 40da1b0afefd42bb27255e6980ac9b3fd43d7654 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 6 Jun 2023 18:44:00 +0800 Subject: [PATCH 03/78] =?UTF-8?q?=E5=B0=86Latex=E5=88=86=E8=A7=A3=E7=A8=8B?= =?UTF-8?q?=E5=BA=8F=E6=94=BE=E5=88=B0=E5=AD=90=E8=BF=9B=E7=A8=8B=E6=89=A7?= =?UTF-8?q?=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 15 ++++- crazy_functions/crazy_functions_test.py | 8 +-- crazy_functions/latex_utils.py | 84 +++++++++++++++++-------- docs/Dockerfile+NoLocal+Latex | 21 ++----- 4 files changed, 80 insertions(+), 48 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index ecba82b..855cc1c 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -82,7 +82,14 @@ def arxiv_download(chatbot, history, txt): promote_file_to_downloadzone(target_file) return target_file return False - + def is_float(s): + try: + float(s) + return True + except ValueError: + return False + if ('.' in txt) and ('/' not in txt) and is_float(txt): + txt = 'https://arxiv.org/abs/' + txt if not txt.startswith('https://arxiv.org'): return txt, None @@ -198,7 +205,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- information about this plugin -------------> chatbot.append([ "函数插件功能?", - "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) + "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 @@ -221,6 +228,8 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return + + if os.path.exists(txt): project_folder = txt else: @@ -228,6 +237,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return + file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] if len(file_manifest) == 0: report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") @@ -261,5 +271,6 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + # <-------------- we are done -------------> return success diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index d19d653..e743878 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -182,13 +182,13 @@ def test_Langchain知识库读取(): def test_Latex(): from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF - txt = r"https://arxiv.org/abs/1706.03762" + # txt = r"https://arxiv.org/abs/1706.03762" # txt = r"https://arxiv.org/abs/1902.03185" # txt = r"https://arxiv.org/abs/2305.18290" # txt = r"https://arxiv.org/abs/2305.17608" - # txt = r"https://arxiv.org/abs/2211.16068" # ACE - # txt = r"C:\Users\fuqingxu\arxiv_cache\2211.16068\workfolder" # ACE - + # txt = r"https://arxiv.org/abs/2211.16068" # ACE + # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE + txt = r"https://arxiv.org/abs/2002.09253" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index b490b5c..15dfebc 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -61,8 +61,8 @@ class LinkedListNode(): self.string = string self.preserve = preserve self.next = None - self.begin_line = 0 - self.begin_char = 0 + # self.begin_line = 0 + # self.begin_char = 0 def convert_to_linklist(text, mask): root = LinkedListNode("", preserve=True) @@ -97,11 +97,22 @@ def 寻找Latex主文件(file_manifest, mode): else: continue raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') - +def rm_comments(main_file): + new_file_remove_comment_lines = [] + for l in main_file.splitlines(): + # 删除整行的空注释 + if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")): + pass + else: + new_file_remove_comment_lines.append(l) + main_file = '\n'.join(new_file_remove_comment_lines) + main_file = re.sub(r'(? None: - """ - root是链表的根节点 - """ - self.root = None + self.nodes = None self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" @@ -212,16 +212,13 @@ class LatexPaperSplit(): Merge the result after the GPT process completed """ result_string = "" - node = self.root p = 0 - while True: + for node in self.nodes: if node.preserve: result_string += node.string else: result_string += fix_content(arr[p], node.string) p += 1 - node = node.next - if node is None: break if mode == 'translate_zh': pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(result_string) @@ -229,7 +226,27 @@ class LatexPaperSplit(): result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string - def split(self, txt, project_folder): + def split(self, txt, project_folder): + """ + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. + P.S. use multiprocessing to avoid timeout error + """ + import multiprocessing + manager = multiprocessing.Manager() + return_dict = manager.dict() + p = multiprocessing.Process( + target=lambda lps, txt, project_folder, return_dict: + lps.split_subprocess(txt, project_folder, return_dict), + args=(self, txt, project_folder, return_dict)) + p.start() + p.join() + self.nodes = return_dict['nodes'] + self.sp = return_dict['segment_parts_for_gpt'] + return self.sp + + def split_subprocess(self, txt, project_folder, return_dict): """ break down latex file to a linked list, each node use a preserve flag to indicate whether it should @@ -318,12 +335,20 @@ class LatexPaperSplit(): node = node.next if node is None: break + # 屏蔽空行和太短的句子 node = root while True: if len(node.string.strip('\n').strip(''))==0: node.preserve = True if len(node.string.strip('\n').strip(''))<42: node.preserve = True node = node.next if node is None: break + node = root + while True: + if node.next and node.preserve and node.next.preserve: + node.string += node.next.string + node.next = node.next.next + node = node.next + if node is None: break # 将前后断行符脱离 node = root @@ -345,8 +370,10 @@ class LatexPaperSplit(): with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: segment_parts_for_gpt = [] + nodes = [] node = root while True: + nodes.append(node) show_html = node.string.replace('\n','
') if not node.preserve: segment_parts_for_gpt.append(node.string) @@ -355,9 +382,11 @@ class LatexPaperSplit(): f.write(f'

{show_html}

') node = node.next if node is None: break - self.root = root - self.sp = segment_parts_for_gpt - return self.sp + + for n in nodes: n.next = None # break + return_dict['nodes'] = nodes + return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt + return return_dict class LatexPaperFileGroup(): """ @@ -439,7 +468,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin # <-------- 精细切分latex文件 ----------> lps = LatexPaperSplit() - res = lps.split(merged_content, project_folder) + res = lps.split(merged_content, project_folder) # 消耗时间的函数 # <-------- 拆分过长的latex片段 ----------> pfg = LatexPaperFileGroup() @@ -515,7 +544,8 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work f.writelines(file_lines) return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines except: - return False, 0, [0] + print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") + return False, -1, [-1] def compile_latex_with_timeout(command, timeout=60): diff --git a/docs/Dockerfile+NoLocal+Latex b/docs/Dockerfile+NoLocal+Latex index 428dbc0..0f9ac8a 100644 --- a/docs/Dockerfile+NoLocal+Latex +++ b/docs/Dockerfile+NoLocal+Latex @@ -8,26 +8,17 @@ FROM fuqingxu/python311_texlive_ctex:latest # 指定路径 WORKDIR /gpt +ARG useProxyNetwork='' + +RUN $useProxyNetwork pip3 install gradio openai numpy arxiv rich -i https://pypi.douban.com/simple/ +RUN $useProxyNetwork pip3 install colorama Markdown pygments pymupdf -i https://pypi.douban.com/simple/ + # 装载项目文件 COPY . . -ARG useProxyNetwork='' - - -# # # comment out below if you do not need proxy network | 如果不需要翻墙 - 从此行向下删除 -# RUN apt-get update -# RUN apt-get install -y curl proxychains -# RUN $useProxyNetwork curl cip.cc -# RUN sed -i '$ d' /etc/proxychains.conf -# RUN sed -i '$ d' /etc/proxychains.conf -# RUN echo "socks5 127.0.0.1 10880" >> /etc/proxychains.conf -# ARG useProxyNetwork=proxychains -# # # comment out above if you do not need proxy network | 如果不需要翻墙 - 从此行向上删除 - - # 安装依赖 -RUN $useProxyNetwork pip3 install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple +RUN $useProxyNetwork pip3 install -r requirements.txt -i https://pypi.douban.com/simple/ # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From 8ef734410160f2b8090a2ec10b15069ee60da9b7 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Tue, 6 Jun 2023 18:57:52 +0800 Subject: [PATCH 04/78] fix subprocess bug in Windows --- crazy_functions/latex_utils.py | 288 +++++++++++++++++---------------- 1 file changed, 145 insertions(+), 143 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 15dfebc..d3d7b9c 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -192,6 +192,149 @@ def fix_content(final_tex, node_string): final_tex = node_string # 出问题了,还原原文 return final_tex +def split_subprocess(txt, project_folder, return_dict): + """ + break down latex file to a linked list, + each node use a preserve flag to indicate whether it should + be proccessed by GPT. + """ + text = txt + mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM + + # 吸收title与作者以上的部分 + text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL) + # 删除iffalse注释 + text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) + # 吸收在25行以内的begin-end组合 + text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) + # 吸收匿名公式 + text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) + # 吸收其他杂项 + text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\item ") + text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") + # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) + root = convert_to_linklist(text, mask) + + # 修复括号 + node = root + while True: + string = node.string + if node.preserve: + node = node.next + if node is None: break + continue + def break_check(string): + str_stack = [""] # (lv, index) + for i, c in enumerate(string): + if c == '{': + str_stack.append('{') + elif c == '}': + if len(str_stack) == 1: + print('stack fix') + return i + str_stack.pop(-1) + else: + str_stack[-1] += c + return -1 + bp = break_check(string) + + if bp == -1: + pass + elif bp == 0: + node.string = string[:1] + q = LinkedListNode(string[1:], False) + q.next = node.next + node.next = q + else: + node.string = string[:bp] + q = LinkedListNode(string[bp:], False) + q.next = node.next + node.next = q + + node = node.next + if node is None: break + + # 屏蔽空行和太短的句子 + node = root + while True: + if len(node.string.strip('\n').strip(''))==0: node.preserve = True + if len(node.string.strip('\n').strip(''))<42: node.preserve = True + node = node.next + if node is None: break + node = root + while True: + if node.next and node.preserve and node.next.preserve: + node.string += node.next.string + node.next = node.next.next + node = node.next + if node is None: break + + # 将前后断行符脱离 + node = root + prev_node = None + while True: + if not node.preserve: + lstriped_ = node.string.lstrip().lstrip('\n') + if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)): + prev_node.string += node.string[:-len(lstriped_)] + node.string = lstriped_ + rstriped_ = node.string.rstrip().rstrip('\n') + if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)): + node.next.string = node.string[len(rstriped_):] + node.next.string + node.string = rstriped_ + # ===== + prev_node = node + node = node.next + if node is None: break + + with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: + segment_parts_for_gpt = [] + nodes = [] + node = root + while True: + nodes.append(node) + show_html = node.string.replace('\n','
') + if not node.preserve: + segment_parts_for_gpt.append(node.string) + f.write(f'

#{show_html}#

') + else: + f.write(f'

{show_html}

') + node = node.next + if node is None: break + + for n in nodes: n.next = None # break + return_dict['nodes'] = nodes + return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt + return return_dict + + class LatexPaperSplit(): """ @@ -237,156 +380,15 @@ class LatexPaperSplit(): manager = multiprocessing.Manager() return_dict = manager.dict() p = multiprocessing.Process( - target=lambda lps, txt, project_folder, return_dict: - lps.split_subprocess(txt, project_folder, return_dict), - args=(self, txt, project_folder, return_dict)) + target=split_subprocess, + args=(txt, project_folder, return_dict)) p.start() p.join() self.nodes = return_dict['nodes'] self.sp = return_dict['segment_parts_for_gpt'] return self.sp - def split_subprocess(self, txt, project_folder, return_dict): - """ - break down latex file to a linked list, - each node use a preserve flag to indicate whether it should - be proccessed by GPT. - """ - text = txt - mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM - # 吸收title与作者以上的部分 - text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL) - # 删除iffalse注释 - text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) - # 吸收在25行以内的begin-end组合 - text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) - # 吸收匿名公式 - text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) - # 吸收其他杂项 - text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\item ") - text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") - # text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) - root = convert_to_linklist(text, mask) - - # 修复括号 - node = root - while True: - string = node.string - if node.preserve: - node = node.next - if node is None: break - continue - def break_check(string): - str_stack = [""] # (lv, index) - for i, c in enumerate(string): - if c == '{': - str_stack.append('{') - elif c == '}': - if len(str_stack) == 1: - print('stack fix') - return i - str_stack.pop(-1) - else: - str_stack[-1] += c - return -1 - bp = break_check(string) - - if bp == -1: - pass - elif bp == 0: - node.string = string[:1] - q = LinkedListNode(string[1:], False) - q.next = node.next - node.next = q - else: - node.string = string[:bp] - q = LinkedListNode(string[bp:], False) - q.next = node.next - node.next = q - - node = node.next - if node is None: break - - # 屏蔽空行和太短的句子 - node = root - while True: - if len(node.string.strip('\n').strip(''))==0: node.preserve = True - if len(node.string.strip('\n').strip(''))<42: node.preserve = True - node = node.next - if node is None: break - node = root - while True: - if node.next and node.preserve and node.next.preserve: - node.string += node.next.string - node.next = node.next.next - node = node.next - if node is None: break - - # 将前后断行符脱离 - node = root - prev_node = None - while True: - if not node.preserve: - lstriped_ = node.string.lstrip().lstrip('\n') - if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)): - prev_node.string += node.string[:-len(lstriped_)] - node.string = lstriped_ - rstriped_ = node.string.rstrip().rstrip('\n') - if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)): - node.next.string = node.string[len(rstriped_):] + node.next.string - node.string = rstriped_ - # ===== - prev_node = node - node = node.next - if node is None: break - - with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f: - segment_parts_for_gpt = [] - nodes = [] - node = root - while True: - nodes.append(node) - show_html = node.string.replace('\n','
') - if not node.preserve: - segment_parts_for_gpt.append(node.string) - f.write(f'

#{show_html}#

') - else: - f.write(f'

{show_html}

') - node = node.next - if node is None: break - - for n in nodes: n.next = None # break - return_dict['nodes'] = nodes - return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt - return return_dict class LatexPaperFileGroup(): """ From 9aafb2ee479f067ac4b6a955a7e43a1d0c553f50 Mon Sep 17 00:00:00 2001 From: MengDanzz <95761983+MengDanzz@users.noreply.github.com> Date: Wed, 7 Jun 2023 09:18:57 +0800 Subject: [PATCH 05/78] =?UTF-8?q?=E9=9D=9Epypi=E5=8C=85=E5=8A=A0=E5=85=A5C?= =?UTF-8?q?OPY?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index aa4eee8..77f4188 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,7 @@ WORKDIR /gpt # 安装依赖 COPY requirements.txt ./ +COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl RUN pip3 install -r requirements.txt # 装载项目文件 COPY . . From dae65fd2c293cb4c4c8370ce962d5038a24378ce Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Wed, 7 Jun 2023 10:43:45 +0800 Subject: [PATCH 06/78] =?UTF-8?q?=E5=9C=A8copy=20..=E5=90=8E=E5=9C=A8?= =?UTF-8?q?=E8=BF=90=E8=A1=8C=E4=B8=80=E6=AC=A1pip=20install=E6=A3=80?= =?UTF-8?q?=E6=9F=A5=E4=BE=9D=E8=B5=96=E5=8F=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index 77f4188..97ad13d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -19,6 +19,7 @@ COPY ./docs/gradio-3.32.2-py3-none-any.whl ./docs/gradio-3.32.2-py3-none-any.whl RUN pip3 install -r requirements.txt # 装载项目文件 COPY . . +RUN pip3 install -r requirements.txt # 可选步骤,用于预热模块 RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' From 149db621ec812fd2341fe3060b80ee210a81e528 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Wed, 7 Jun 2023 11:09:12 +0800 Subject: [PATCH 07/78] langchain check depends --- crazy_functions/Langchain知识库.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py index 36999d5..5b09d3b 100644 --- a/crazy_functions/Langchain知识库.py +++ b/crazy_functions/Langchain知识库.py @@ -75,9 +75,18 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro @CatchException def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1): + # resolve deps + try: + from zh_langchain import construct_vector_store + from langchain.embeddings.huggingface import HuggingFaceEmbeddings + from .crazy_utils import knowledge_archive_interface + except Exception as e: + chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + from .crazy_utils import try_install_deps + try_install_deps(['zh_langchain==0.2.0']) # < ------------------- --------------- > - from .crazy_utils import knowledge_archive_interface kai = knowledge_archive_interface() if 'langchain_plugin_embedding' in chatbot._cookies: From 77cc141a8227ee78a936b57de970cd74b89495e3 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:14:02 +0800 Subject: [PATCH 08/78] Update README.md --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 02f047d..c671477 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck),再回答问题,让信息永不过时 +Arxiv论文精密翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 @@ -285,11 +286,18 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
+
+ +
+10. Latex/Arxiv论文翻译功能 +
+ +
## 版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) -- version 3.4(Todo): 完善chatglm本地大模型的多线支持 +- version 3.4: +arxiv论文翻译、latex论文批改功能 - version 3.3: +互联网信息综合功能 - version 3.2: 函数插件支持更多参数接口 (保存对话功能, 解读任意语言代码+同时询问任意的LLM组合) - version 3.1: 支持同时问询多个gpt模型!支持api2d,支持多个apikey负载均衡 From e2de1d76ea9c2747b6ed0c5a90abc8863893bf20 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:18:31 +0800 Subject: [PATCH 09/78] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c671477..d72a29f 100644 --- a/README.md +++ b/README.md @@ -292,7 +292,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex/Arxiv论文翻译功能
- + ===> +
## 版本: From 10b3001dba7cde9ac6b8934eac287380f5ba16cf Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:19:11 +0800 Subject: [PATCH 10/78] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d72a29f..8ada026 100644 --- a/README.md +++ b/README.md @@ -292,8 +292,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex/Arxiv论文翻译功能
- ===> - + ===> +
## 版本: From ce6f11d2003864edc0fb22051403791c0cbba5b3 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:20:49 +0800 Subject: [PATCH 11/78] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8ada026..40125ac 100644 --- a/README.md +++ b/README.md @@ -292,8 +292,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex/Arxiv论文翻译功能
- ===> - + ===> +
## 版本: From a0ea5d0e9e7abd951e92162f457722e816848b62 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:22:03 +0800 Subject: [PATCH 12/78] Update README.md --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 40125ac..289cf06 100644 --- a/README.md +++ b/README.md @@ -284,15 +284,13 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex全文校对纠错
- -
-
- + ===> +
10. Latex/Arxiv论文翻译功能
- ===> +
From f9226d92be881faa2bea7e42f75c467b6ea2f7dd Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:24:14 +0800 Subject: [PATCH 13/78] Update version --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index ad75b2c..669c708 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.37, + "version": 3.4, "show_feature": true, - "new_feature": "修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持 <-> 提供复旦MOSS模型适配(启用需额外依赖) <-> 提供docker-compose方案兼容LLAMA盘古RWKV等模型的后端 <-> 新增Live2D装饰 <-> 完善对话历史的保存/载入/删除 <-> 保存对话功能" + "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" } From ff5403eac6e615c74a991dfecd93f9a5a12036d4 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:42:24 +0800 Subject: [PATCH 14/78] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 289cf06..d121116 100644 --- a/README.md +++ b/README.md @@ -284,8 +284,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 10. Latex全文校对纠错
- ===> - + ===> +
10. Latex/Arxiv论文翻译功能 From f30c9c6d3bf34f63b82e59c031220124c0e1c35d Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:43:13 +0800 Subject: [PATCH 15/78] Update README.md --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d121116..d4526c9 100644 --- a/README.md +++ b/README.md @@ -233,27 +233,31 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h +2. Latex/Arxiv论文翻译功能 +
+ + +
- -2. 生成报告。大部分插件都会在执行结束后,生成工作报告 +3. 生成报告。大部分插件都会在执行结束后,生成工作报告
-3. 模块化功能设计,简单的接口却能支持强大的功能 +4. 模块化功能设计,简单的接口却能支持强大的功能
-4. 这是一个能够“自我译解”的开源项目 +5. 这是一个能够“自我译解”的开源项目
-5. 译解其他开源项目,不在话下 +6. 译解其他开源项目,不在话下
@@ -262,37 +266,33 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h -6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) +7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`)
-7. 新增MOSS大语言模型支持 +8. 新增MOSS大语言模型支持
-8. OpenAI图像生成 +9. OpenAI图像生成
-9. OpenAI音频解析与总结 +10. OpenAI音频解析与总结
-10. Latex全文校对纠错 +11. Latex全文校对纠错
===>
-10. Latex/Arxiv论文翻译功能 -
- - -
+ ## 版本: - version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级) From b52695845e181399fb0b5607b26125a8070dd1e6 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:44:05 +0800 Subject: [PATCH 16/78] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4526c9..2045942 100644 --- a/README.md +++ b/README.md @@ -235,7 +235,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 2. Latex/Arxiv论文翻译功能
- + ===>
From 110510997f9018d206c05384ccb7526ae9b96db2 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Thu, 8 Jun 2023 12:48:52 +0800 Subject: [PATCH 17/78] Update README.md --- README.md | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 2045942..d4d6858 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/) 互联网信息聚合+GPT | [函数插件] 一键[让GPT先从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck),再回答问题,让信息永不过时 -Arxiv论文精密翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具 +⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具⭐ 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 @@ -233,7 +233,7 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h -2. Latex/Arxiv论文翻译功能 +2. ⭐Latex/Arxiv论文翻译功能⭐
===> @@ -241,9 +241,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 3. 生成报告。大部分插件都会在执行结束后,生成工作报告
- - - + +
4. 模块化功能设计,简单的接口却能支持强大的功能 @@ -259,11 +258,8 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h 6. 译解其他开源项目,不在话下
- -
- -
- + +
7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) From e48d92e82e9634b5194947567bf7512a346d3343 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 8 Jun 2023 18:34:06 +0800 Subject: [PATCH 18/78] update translation --- docs/translate_english.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/translate_english.json b/docs/translate_english.json index d9968c6..57e008b 100644 --- a/docs/translate_english.json +++ b/docs/translate_english.json @@ -58,6 +58,8 @@ "连接网络回答问题": "ConnectToNetworkToAnswerQuestions", "联网的ChatGPT": "ChatGPTConnectedToNetwork", "解析任意code项目": "ParseAnyCodeProject", + "读取知识库作答": "ReadKnowledgeArchiveAnswerQuestions", + "知识库问答": "UpdateKnowledgeArchive", "同时问询_指定模型": "InquireSimultaneously_SpecifiedModel", "图片生成": "ImageGeneration", "test_解析ipynb文件": "Test_ParseIpynbFile", From ef1bfdd60f6b7c23bb23406cd8e0603f51f81165 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 8 Jun 2023 21:29:10 +0800 Subject: [PATCH 19/78] update pip install notice --- crazy_functions/数学动画生成manim.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/数学动画生成manim.py b/crazy_functions/数学动画生成manim.py index 5851b9c..26e61b1 100644 --- a/crazy_functions/数学动画生成manim.py +++ b/crazy_functions/数学动画生成manim.py @@ -8,7 +8,7 @@ def inspect_dependency(chatbot, history): import manim return True except: - chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manimgl```"]) + chatbot.append(["导入依赖失败", "使用该模块需要额外依赖,安装方法:```pip install manim manimgl```"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return False From 3c00e7a143f4c619166d4821d9804ef8aa0c5848 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sat, 10 Jun 2023 21:45:38 +0800 Subject: [PATCH 20/78] file link in chatbot --- toolbox.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/toolbox.py b/toolbox.py index 18915d0..4b0e1dd 100644 --- a/toolbox.py +++ b/toolbox.py @@ -483,7 +483,9 @@ def on_report_generated(files, chatbot): if len(report_files) == 0: return None, chatbot # files.extend(report_files) - chatbot.append(['报告如何远程获取?', '报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。']) + file_links = '' + for f in report_files: file_links += f'
{f}' + chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) return report_files, chatbot def is_openai_api_key(key): From ce0d8b9dab677dfb8e46429a58eeec2ef965b0ab Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Sun, 11 Jun 2023 01:36:23 +0800 Subject: [PATCH 21/78] =?UTF-8?q?=E8=99=9A=E7=A9=BA=E7=BB=88=E7=AB=AF?= =?UTF-8?q?=E6=8F=92=E4=BB=B6=E9=9B=8F=E5=BD=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 16 ++++- crazy_functions/虚空终端.py | 131 ++++++++++++++++++++++++++++++++++++ toolbox.py | 16 +++-- 3 files changed, 158 insertions(+), 5 deletions(-) create mode 100644 crazy_functions/虚空终端.py diff --git a/crazy_functional.py b/crazy_functional.py index d8ca9ae..2f0fbaa 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -368,5 +368,19 @@ def get_crazy_functions(): }) except: print('Load function plugin failed') - ###################### 第n组插件 ########################### + + try: + from crazy_functions.虚空终端 import 终端 + function_plugins.update({ + "超级终端": { + "Color": "stop", + "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": "", + "Function": HotReload(终端) + } + }) + except: + print('Load function plugin failed') + return function_plugins diff --git a/crazy_functions/虚空终端.py b/crazy_functions/虚空终端.py new file mode 100644 index 0000000..fe71a46 --- /dev/null +++ b/crazy_functions/虚空终端.py @@ -0,0 +1,131 @@ +from toolbox import CatchException, update_ui, gen_time_str +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive +from .crazy_utils import input_clipping + + +prompt = """ +I have to achieve some functionalities by calling one of the functions below. +Your job is to find the correct funtion to use to satisfy my requirement, +and then write python code to call this function with correct parameters. + +These are functions you are allowed to choose from: +1. + 功能描述: 总结音视频内容 + 调用函数: ConcludeAudioContent(txt, llm_kwargs) + 参数说明: + txt: 音频文件的路径 + llm_kwargs: 模型参数, 永远给定None +2. + 功能描述: 将每次对话记录写入Markdown格式的文件中 + 调用函数: WriteMarkdown() +3. + 功能描述: 将指定目录下的PDF文件从英文翻译成中文 + 调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs) + 参数说明: + txt: PDF文件所在的路径 + llm_kwargs: 模型参数, 永远给定None +4. + 功能描述: 根据文本使用GPT模型生成相应的图像 + 调用函数: ImageGeneration(txt, llm_kwargs) + 参数说明: + txt: 图像生成所用到的提示文本 + llm_kwargs: 模型参数, 永远给定None +5. + 功能描述: 对输入的word文档进行摘要生成 + 调用函数: SummarizingWordDocuments(input_path, output_path) + 参数说明: + input_path: 待处理的word文档路径 + output_path: 摘要生成后的文档路径 + + +You should always anwser with following format: +---------------- +Code: +``` +class AutoAcademic(object): + def __init__(self): + self.selected_function = "FILL_CORRECT_FUNCTION_HERE" # e.g., "GenerateImage" + self.txt = "FILL_MAIN_PARAMETER_HERE" # e.g., "荷叶上的蜻蜓" + self.llm_kwargs = None +``` +Explanation: +只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。 +---------------- + +Now, this is my requirement: + +""" +def get_fn_lib(): + return { + "BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程", "批量翻译PDF文档"), + "SummarizingWordDocuments": ("crazy_functions.总结word文档", "总结word文档"), + "ImageGeneration": ("crazy_functions.图片生成", "图片生成"), + "TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译", "Markdown中译英"), + "SummaryAudioVideo": ("crazy_functions.总结音视频", "总结音视频"), + } + +def inspect_dependency(chatbot, history): + return True + +def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + import subprocess, sys, os, shutil, importlib + + with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f: + f.write(code) + + try: + AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic') + # importlib.reload(AutoAcademic) + auto_dict = AutoAcademic() + selected_function = auto_dict.selected_function + txt = auto_dict.txt + fp, fn = get_fn_lib()[selected_function] + fn_plugin = getattr(importlib.import_module(fp, fn), fn) + yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port) + except: + from toolbox import trimmed_format_exc + chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + +def get_code_block(reply): + import re + pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks + matches = re.findall(pattern, reply) # find all code blocks in text + if len(matches) != 1: + raise RuntimeError("GPT is not generating proper code.") + return matches[0].strip('python') # code block + +@CatchException +def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本, 例如需要翻译的一段话, 再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行 + plugin_kwargs 插件模型的参数, 暂时没有用武之地 + chatbot 聊天显示框的句柄, 用于显示给用户 + history 聊天历史, 前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + # 清空历史, 以免输入溢出 + history = [] + + # 基本信息:功能、贡献者 + chatbot.append(["函数插件功能?", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + + # # 尝试导入依赖, 如果缺少依赖, 则给出安装建议 + # dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面 + # if not dep_ok: return + + # 输入 + i_say = prompt + txt + # 开始 + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=txt, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], + sys_prompt="" + ) + + # 将代码转为动画 + code = get_code_block(gpt_say) + yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port) diff --git a/toolbox.py b/toolbox.py index 4b0e1dd..4ab1116 100644 --- a/toolbox.py +++ b/toolbox.py @@ -221,16 +221,21 @@ def text_divide_paragraph(text): """ 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。 """ + pre = '
' + suf = '
' + if text.startswith(pre) and text.endswith(suf): + return text + if '```' in text: # careful input - return text + return pre + text + suf else: # wtf input lines = text.split("\n") for i, line in enumerate(lines): lines[i] = lines[i].replace(" ", " ") text = "
".join(lines) - return text + return pre + text + suf @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 def markdown_convertion(txt): @@ -342,8 +347,11 @@ def format_io(self, y): if y is None or y == []: return [] i_ask, gpt_reply = y[-1] - i_ask = text_divide_paragraph(i_ask) # 输入部分太自由,预处理一波 - gpt_reply = close_up_code_segment_during_stream(gpt_reply) # 当代码输出半截的时候,试着补上后个``` + # 输入部分太自由,预处理一波 + if i_ask is not None: i_ask = text_divide_paragraph(i_ask) + # 当代码输出半截的时候,试着补上后个``` + if gpt_reply is not None: gpt_reply = close_up_code_segment_during_stream(gpt_reply) + # process y[-1] = ( None if i_ask is None else markdown.markdown(i_ask, extensions=['fenced_code', 'tables']), None if gpt_reply is None else markdown_convertion(gpt_reply) From aeddf6b461d58eb7c755b1ed1d8ce2810cdf752f Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 11 Jun 2023 10:20:49 +0800 Subject: [PATCH 22/78] =?UTF-8?q?Update=20Latex=E8=BE=93=E5=87=BAPDF?= =?UTF-8?q?=E7=BB=93=E6=9E=9C.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 855cc1c..6592c9a 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -146,7 +146,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", - f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return @@ -216,7 +216,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, from .latex_utils import Latex精细分解与转化, 编译Latex except Exception as e: chatbot.append([ f"解析项目: {txt}", - f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) + f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 return From 3ecf2977a86abaf49ddaf112a196bc7f8fcb6717 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 11 Jun 2023 18:23:54 +0800 Subject: [PATCH 23/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dcaption=E7=BF=BB?= =?UTF-8?q?=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 59 +++++++++++++++++++++++++++------- 1 file changed, 47 insertions(+), 12 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index d3d7b9c..afaae22 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -29,7 +29,15 @@ def split_worker_reverse_caption(text, mask, pattern, flags=0): """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): - mask[res.regs[1][0]:res.regs[1][1]] = TRANSFORM + brace_level = 0 + p = begin = end = res.regs[1][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p + mask[begin:end] = TRANSFORM return text, mask def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): @@ -97,6 +105,7 @@ def 寻找Latex主文件(file_manifest, mode): else: continue raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): @@ -108,6 +117,7 @@ def rm_comments(main_file): main_file = '\n'.join(new_file_remove_comment_lines) main_file = re.sub(r'(? 0 and node_string.count('\_') > final_tex.count('\_'): # walk and replace any _ without \ final_tex = re.sub(r"(? lps = LatexPaperSplit() - res = lps.split(merged_content, project_folder) # 消耗时间的函数 + res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 # <-------- 拆分过长的latex片段 ----------> pfg = LatexPaperFileGroup() From 790a1cf12a2a98811ccb4c38568f21b120049f7a Mon Sep 17 00:00:00 2001 From: binary-husky Date: Sun, 11 Jun 2023 20:12:25 +0800 Subject: [PATCH 24/78] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index afaae22..89ca7a5 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -23,9 +23,26 @@ def split_worker(text, mask, pattern, flags=0): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask -def split_worker_reverse_caption(text, mask, pattern, flags=0): +def split_worker_careful_brace(text, mask, pattern, flags=0): """ - Move caption area out of preserve area + Move area into preserve area + """ + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + brace_level = -1 + p = begin = end = res.regs[0][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p+1 + mask[begin:end] = PRESERVE + return text, mask + +def split_worker_reverse_careful_brace(text, mask, pattern, flags=0): + """ + Move area out of preserve area """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): @@ -274,7 +291,8 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") - text, mask = split_worker_reverse_caption(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) + text, mask = split_worker_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL) + text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) root = convert_to_linklist(text, mask) # 修复括号 @@ -504,6 +522,8 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin f.write(merged_content) # <-------- 精细切分latex文件 ----------> + chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。')) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 lps = LatexPaperSplit() res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数 @@ -602,7 +622,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f current_dir = os.getcwd() n_fix = 1 max_try = 32 - chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,则大概率是卡死在Latex里面了。不幸卡死时请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) + chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history) chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面 yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面 From 9fd212652ed0e80d3e55b9b72461fc24d3837ce1 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 12 Jun 2023 09:45:59 +0800 Subject: [PATCH 25/78] =?UTF-8?q?=E4=B8=93=E4=B8=9A=E8=AF=8D=E6=B1=87?= =?UTF-8?q?=E5=A3=B0=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 6592c9a..2e9a30b 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -5,7 +5,7 @@ pj = os.path.join ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") # =================================== 工具函数 =============================================== -沙雕GPT啊别犯这些低级翻译错误 = 'You must to translate "agent" to "智能体". ' +专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' def switch_prompt(pfg, mode): """ Generate prompts and system prompts based on the mode for proofreading or translating. @@ -25,7 +25,7 @@ def switch_prompt(pfg, mode): f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] elif mode == 'translate_zh': - inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese." + 沙雕GPT啊别犯这些低级翻译错误 + + inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Answer me only with the translated text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] From 9ac3d0d65dfd1ae4209a28139d133057a3c22e39 Mon Sep 17 00:00:00 2001 From: OverKit <78402478+OverKit@users.noreply.github.com> Date: Mon, 12 Jun 2023 10:09:52 +0800 Subject: [PATCH 26/78] check letter % after removing spaces or tabs in the left --- crazy_functions/latex_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 89ca7a5..53894ca 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -127,7 +127,7 @@ def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): # 删除整行的空注释 - if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")): + if l.lstrip().startswith("%"): pass else: new_file_remove_comment_lines.append(l) From c365ea9f579acb88ffd756c3483c5c29fc2b57c3 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Tue, 13 Jun 2023 16:13:19 +0800 Subject: [PATCH 27/78] Update README.md --- README.md | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index d4d6858..581d3d5 100644 --- a/README.md +++ b/README.md @@ -228,7 +228,7 @@ docker-compose up 1. 对话保存功能。在函数插件区调用 `保存当前的对话` 即可将当前对话保存为可读+可复原的html文件, 另外在函数插件区(下拉菜单)调用 `载入对话历史存档` ,即可还原之前的会话。 -Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存,点击 `删除所有本地对话历史记录` 可以删除所有html存档缓存。 +Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史html存档缓存。
@@ -251,38 +251,33 @@ Tip:不指定文件直接点击 `载入对话历史存档` 可以查看历史h
-5. 这是一个能够“自我译解”的开源项目 -
- -
- -6. 译解其他开源项目,不在话下 +5. 译解其他开源项目
-7. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`) +6. 装饰[live2d](https://github.com/fghrsh/live2d_demo)的小功能(默认关闭,需要修改`config.py`)
-8. 新增MOSS大语言模型支持 +7. 新增MOSS大语言模型支持
-9. OpenAI图像生成 +8. OpenAI图像生成
-10. OpenAI音频解析与总结 +9. OpenAI音频解析与总结
-11. Latex全文校对纠错 +10. Latex全文校对纠错
===> @@ -310,30 +305,32 @@ gpt_academic开发者QQ群-2:610599535 - 已知问题 - 某些浏览器翻译插件干扰此软件前端的运行 - - 官方Gradio目前有很多兼容性Bug,请务必使用requirement.txt安装Gradio + - 官方Gradio目前有很多兼容性Bug,请务必使用`requirement.txt`安装Gradio ## 参考与学习 ``` -代码中参考了很多其他优秀项目中的设计,主要包括: +代码中参考了很多其他优秀项目中的设计,顺序不分先后: -# 项目1:清华ChatGLM-6B: +# 清华ChatGLM-6B: https://github.com/THUDM/ChatGLM-6B -# 项目2:清华JittorLLMs: +# 清华JittorLLMs: https://github.com/Jittor/JittorLLMs -# 项目3:Edge-GPT: -https://github.com/acheong08/EdgeGPT - -# 项目4:ChuanhuChatGPT: -https://github.com/GaiZhenbiao/ChuanhuChatGPT - -# 项目5:ChatPaper: +# ChatPaper: https://github.com/kaixindelele/ChatPaper -# 更多: +# Edge-GPT: +https://github.com/acheong08/EdgeGPT + +# ChuanhuChatGPT: +https://github.com/GaiZhenbiao/ChuanhuChatGPT + +# Oobabooga one-click installer: +https://github.com/oobabooga/one-click-installers + +# More: https://github.com/gradio-app/gradio https://github.com/fghrsh/live2d_demo -https://github.com/oobabooga/one-click-installers ``` From c40ebfc21f39b995b3f0437b387d5ee561759327 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Wed, 14 Jun 2023 09:50:15 +0800 Subject: [PATCH 28/78] =?UTF-8?q?=E5=B0=86gpt-3.5-16k=E4=BD=9C=E4=B8=BA?= =?UTF-8?q?=E5=8A=A0=E5=85=A5=E6=94=AF=E6=8C=81=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 2 +- request_llm/bridge_all.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 14b089e..87e0ec9 100644 --- a/config.py +++ b/config.py @@ -46,7 +46,7 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ -AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # 本地LLM模型如ChatGLM的执行方式 CPU/GPU diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index b6efe21..a27407c 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -83,6 +83,15 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + + "gpt-3.5-turbo-16k": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 1024*16, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, "gpt-4": { "fn_with_ui": chatgpt_ui, From 8c62f21aa6b0c68bdc795f315f5d325b1384161b Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Wed, 14 Jun 2023 09:57:09 +0800 Subject: [PATCH 29/78] =?UTF-8?q?3.41=E5=A2=9E=E5=8A=A0gpt-3.5-16k?= =?UTF-8?q?=E7=9A=84=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index 669c708..ceb909a 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.4, + "version": 3.41, "show_feature": true, - "new_feature": "新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" + "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" } From 73d4a1ff4b41548b6d6b5ea4c321fa2e81fe55ce Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Wed, 14 Jun 2023 10:15:47 +0800 Subject: [PATCH 30/78] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 581d3d5..39b37ea 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 -[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持,[API2D](https://api2d.com/)接口支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? +[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama),[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/) 更多新功能展示(图像生成等) …… | 见本文档结尾处 …… From ef752838b06a4898ba23ea34dd349be6a51a199e Mon Sep 17 00:00:00 2001 From: Skyzayre <120616113+Skyzayre@users.noreply.github.com> Date: Thu, 15 Jun 2023 02:07:43 +0800 Subject: [PATCH 31/78] Update README.md --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 39b37ea..c3dd52a 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ To translate this project to arbitary language with GPT, read and run [`multi_la > > 1.请注意只有**红颜色**标识的函数插件(按钮)才支持读取文件,部分插件位于插件区的**下拉菜单**中。另外我们以**最高优先级**欢迎和处理任何新插件的PR! > -> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 +> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代,您也可以随时自行点击相关函数插件,调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/gpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。 > > 3.本项目兼容并鼓励尝试国产大语言模型chatglm和RWKV, 盘古等等。支持多个api-key共存,可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。 @@ -31,13 +31,13 @@ To translate this project to arbitary language with GPT, read and run [`multi_la 一键中英互译 | 一键中英互译 一键代码解释 | 显示代码、解释代码、生成代码、给代码加注释 [自定义快捷键](https://www.bilibili.com/video/BV14s4y1E7jN) | 支持自定义快捷键 -模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/chatgpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) -[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/chatgpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码 +模块化设计 | 支持自定义强大的[函数插件](https://github.com/binary-husky/gpt_academic/tree/master/crazy_functions),插件支持[热更新](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97) +[自我程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] [一键读懂](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)本项目的源代码 [程序剖析](https://www.bilibili.com/video/BV1cj411A7VW) | [函数插件] 一键可以剖析其他Python/C/C++/Java/Lua/...项目树 读论文、[翻译](https://www.bilibili.com/video/BV1KT411x7Wn)论文 | [函数插件] 一键解读latex/pdf论文全文并生成摘要 Latex全文[翻译](https://www.bilibili.com/video/BV1nk4y1Y7Js/)、[润色](https://www.bilibili.com/video/BV1FT411H7c5/) | [函数插件] 一键翻译或润色latex论文 批量注释生成 | [函数插件] 一键批量生成函数注释 -Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/chatgpt_academic/blob/master/docs/README_EN.md)了吗? +Markdown[中英互译](https://www.bilibili.com/video/BV1yo4y157jV/) | [函数插件] 看到上面5种语言的[README](https://github.com/binary-husky/gpt_academic/blob/master/docs/README_EN.md)了吗? chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 [PDF论文全文翻译功能](https://www.bilibili.com/video/BV1KT411x7Wn) | [函数插件] PDF论文提取题目&摘要+翻译全文(多线程) [Arxiv小助手](https://www.bilibili.com/video/BV1LM4y1279X) | [函数插件] 输入arxiv文章url即可一键翻译摘要+下载PDF @@ -46,7 +46,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 ⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),迄今为止最好的论文翻译工具⭐ 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮 多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序 -启动暗色gradio[主题](https://github.com/binary-husky/chatgpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 +启动暗色gradio[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题 [多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM](https://github.com/THUDM/ChatGLM-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧? 更多LLM模型接入,支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama),[RWKV](https://github.com/BlinkDL/ChatRWKV)和[盘古α](https://openi.org.cn/pangu/) 更多新功能展示(图像生成等) …… | 见本文档结尾处 …… @@ -91,8 +91,8 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 1. 下载项目 ```sh -git clone https://github.com/binary-husky/chatgpt_academic.git -cd chatgpt_academic +git clone https://github.com/binary-husky/.git +cd gpt_academic ``` 2. 配置API_KEY @@ -150,8 +150,8 @@ python main.py 1. 仅ChatGPT(推荐大多数人选择) ``` sh -git clone https://github.com/binary-husky/chatgpt_academic.git # 下载项目 -cd chatgpt_academic # 进入路径 +git clone https://github.com/binary-husky/gpt_academic.git # 下载项目 +cd gpt_academic # 进入路径 nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy”, “API_KEY” 以及 “WEB_PORT” (例如50923) 等 docker build -t gpt-academic . # 安装 @@ -188,10 +188,10 @@ docker-compose up 按照`config.py`中的说明配置API_URL_REDIRECT即可。 4. 远程云服务器部署(需要云服务器知识与经验)。 -请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) +请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) 5. 使用WSL2(Windows Subsystem for Linux 子系统)。 -请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) +请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) 6. 如何在二级网址(如`http://localhost/subpath`)下运行。 请访问[FastAPI运行说明](docs/WithFastapi.md) @@ -220,7 +220,7 @@ docker-compose up 编写强大的函数插件来执行任何你想得到的和想不到的任务。 本项目的插件编写、调试难度很低,只要您具备一定的python基础知识,就可以仿照我们提供的模板实现自己的插件功能。 -详情请参考[函数插件指南](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 +详情请参考[函数插件指南](https://github.com/binary-husky/gpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97)。 --- # Latest Update From 6d849eeb121b9f88821d7e46cad95c32bb9a12a9 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 16 Jun 2023 17:33:03 +0800 Subject: [PATCH 32/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8DLangchain=E6=8F=92?= =?UTF-8?q?=E4=BB=B6=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Langchain知识库.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/Langchain知识库.py b/crazy_functions/Langchain知识库.py index 5b09d3b..31c459a 100644 --- a/crazy_functions/Langchain知识库.py +++ b/crazy_functions/Langchain知识库.py @@ -30,7 +30,7 @@ def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_pro ) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 from .crazy_utils import try_install_deps - try_install_deps(['zh_langchain==0.2.0']) + try_install_deps(['zh_langchain==0.2.1']) # < --------------------读取参数--------------- > if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") @@ -84,7 +84,7 @@ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst chatbot.append(["依赖不足", "导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 from .crazy_utils import try_install_deps - try_install_deps(['zh_langchain==0.2.0']) + try_install_deps(['zh_langchain==0.2.1']) # < ------------------- --------------- > kai = knowledge_archive_interface() From bb864c631376320f8847b36e4a75a38edbb23176 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 16 Jun 2023 17:33:19 +0800 Subject: [PATCH 33/78] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=B8=80=E4=BA=9B?= =?UTF-8?q?=E6=8F=90=E7=A4=BA=E6=96=87=E5=AD=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 53894ca..78eec29 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -165,17 +165,23 @@ def merge_tex_files(project_foler, main_file, mode): main_file = rm_comments(main_file) if mode == 'translate_zh': + # find paper documentclass pattern = re.compile(r'\\documentclass.*\n') match = pattern.search(main_file) + assert match is not None, "Cannot find documentclass statement!" position = match.end() add_ctex = '\\usepackage{ctex}\n' add_url = '\\usepackage{url}\n' if '{url}' not in main_file else '' main_file = main_file[:position] + add_ctex + add_url + main_file[position:] - # 2 fontset=windows + # fontset=windows import platform if platform.system() != 'Windows': main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file) main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file) + # find paper abstract + pattern = re.compile(r'\\begin\{abstract\}.*\n') + match = pattern.search(main_file) + assert match is not None, "Cannot find paper abstract section!" return main_file @@ -418,6 +424,7 @@ class LatexPaperSplit(): if mode == 'translate_zh': pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(result_string) + assert match is not None, "Cannot find paper abstract section!" position = match.end() result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string From 9a5a509dd9c85949a89a7ef763572dca92afeb46 Mon Sep 17 00:00:00 2001 From: OverKit Date: Sat, 17 Jun 2023 19:27:21 +0800 Subject: [PATCH 34/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=85=B3=E4=BA=8Eabstr?= =?UTF-8?q?act=E7=9A=84=E6=90=9C=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 78eec29..3734f00 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -424,7 +424,9 @@ class LatexPaperSplit(): if mode == 'translate_zh': pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(result_string) - assert match is not None, "Cannot find paper abstract section!" + if not match: + pattern = re.compile(r'\\abstract\{') + match = pattern.search(result_string) position = match.end() result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:] return result_string From 2bb13b4677b3bd403b950c97036a2753cda5ec8b Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 18 Jun 2023 15:44:42 +0800 Subject: [PATCH 35/78] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c3dd52a..f079086 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,7 @@ docker run --rm -it --net=host gpt-academic #(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ``` +P.S. 如果需要Latex功能,请见另一个[Dockerfile](https://github.com/binary-husky/gpt_academic/blob/master/docs/Dockerfile%2BNoLocal%2BLatex) 2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker) From 7fdf0a8e51ee7acfcb2822d07a6c3ed1e8c52846 Mon Sep 17 00:00:00 2001 From: OverKit Date: Sun, 18 Jun 2023 15:51:29 +0800 Subject: [PATCH 36/78] =?UTF-8?q?=E8=B0=83=E6=95=B4=E5=8C=BA=E5=88=86?= =?UTF-8?q?=E5=86=85=E5=AE=B9=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 81 +++++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 3734f00..eebce80 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -23,38 +23,67 @@ def split_worker(text, mask, pattern, flags=0): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask -def split_worker_careful_brace(text, mask, pattern, flags=0): +def set_transform_area(text, mask, pattern, flags=0): """ - Move area into preserve area + Add a transform text area in this paper """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): - brace_level = -1 - p = begin = end = res.regs[0][0] - for _ in range(1024*16): - if text[p] == '}' and brace_level == 0: break - elif text[p] == '}': brace_level -= 1 - elif text[p] == '{': brace_level += 1 - p += 1 - end = p+1 - mask[begin:end] = PRESERVE + mask[res.span()[0] : res.span()[1]] = TRANSFORM return text, mask + +def split_worker_careful_brace(text, mask, pattern, flags=0): + """ + Move area into preserve area. + It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})". + """ + pattern_compile = re.compile(pattern, flags) + res = pattern_compile.search(text) + + # 确保捕获组存在 + if res and len(res.regs) > 1: + brace_level = 0 + p = begin = end = res.regs[1][0] + for _ in range(1024 * 16): + if text[p] == "}" and brace_level == 1: + break + elif text[p] == "}": + brace_level -= 1 + elif text[p] == "{": + brace_level += 1 + p += 1 + end = p + mask[begin + 1 : end] = PRESERVE + split_worker_careful_brace(text[end:], mask[end:], pattern, flags=flags) + + return text, mask + + def split_worker_reverse_careful_brace(text, mask, pattern, flags=0): """ - Move area out of preserve area + Move area out of preserve area. + It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})". """ pattern_compile = re.compile(pattern, flags) - for res in pattern_compile.finditer(text): + res = pattern_compile.search(text) + + # 确保捕获组存在 + if res and len(res.regs) > 1: brace_level = 0 p = begin = end = res.regs[1][0] - for _ in range(1024*16): - if text[p] == '}' and brace_level == 0: break - elif text[p] == '}': brace_level -= 1 - elif text[p] == '{': brace_level += 1 + for _ in range(1024 * 16): + if text[p] == "}" and brace_level == 1: + break + elif text[p] == "}": + brace_level -= 1 + elif text[p] == "{": + brace_level += 1 p += 1 end = p - mask[begin:end] = TRANSFORM + mask[begin + 1 : end] = TRANSFORM + split_worker_reverse_careful_brace(text[end:], mask[end:], pattern, flags=flags) + return text, mask def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): @@ -260,13 +289,14 @@ def split_subprocess(txt, project_folder, return_dict, opts): mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM # 吸收title与作者以上的部分 - text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL) + text, mask = split_worker(text, mask, r".*?\\begin\{document\}", re.DOTALL) # 删除iffalse注释 text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) # 吸收在25行以内的begin-end组合 text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) # 吸收匿名公式 text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) + text, mask = split_worker(text, mask, r"\\\[.*?\\\]", re.DOTALL) # 吸收其他杂项 text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") @@ -274,6 +304,7 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") + text, mask = split_worker(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL) text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) @@ -293,12 +324,18 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) text, mask = split_worker(text, mask, r"\\item ") text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") + + text, mask = set_transform_area(text, mask, r"\\begin\{abstract\}.*?\\end\{abstract\}", re.DOTALL) + + text, mask = split_worker_careful_brace(text, mask, r"\\hl(\{.*\})", re.DOTALL) + text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption(\{.*\})", re.DOTALL) + text, mask = split_worker_reverse_careful_brace(text, mask, r"\\abstract(\{.*\})", re.DOTALL) + + text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") - text, mask = split_worker_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL) - text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) + root = convert_to_linklist(text, mask) # 修复括号 From 4bafbb3562f249b9b10a3595ac9f859762a52377 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 18 Jun 2023 15:54:23 +0800 Subject: [PATCH 37/78] =?UTF-8?q?Update=20Latex=E8=BE=93=E5=87=BAPDF?= =?UTF-8?q?=E7=BB=93=E6=9E=9C.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/Latex输出PDF结果.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 2e9a30b..6c89751 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -205,7 +205,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- information about this plugin -------------> chatbot.append([ "函数插件功能?", - "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) + "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 From 8d7ee975a012a9b258408ddcf9a59ea4a29a752d Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 18 Jun 2023 16:10:45 +0800 Subject: [PATCH 38/78] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f079086..182a49b 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,8 @@ conda activate gptac_venv # 激活anaconda环境 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 ``` +P.S. 如果需要依赖Latex的插件功能,请见Wiki +
如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处

@@ -160,7 +162,7 @@ docker run --rm -it --net=host gpt-academic #(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ``` -P.S. 如果需要Latex功能,请见另一个[Dockerfile](https://github.com/binary-husky/gpt_academic/blob/master/docs/Dockerfile%2BNoLocal%2BLatex) +P.S. 如果需要依赖Latex的插件功能,请见Wiki 2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker) From 29c6bfb6cb08f58a0e5fba8540ef56cf36277cf6 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 18 Jun 2023 16:12:06 +0800 Subject: [PATCH 39/78] Update README.md --- README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 182a49b..6e461c2 100644 --- a/README.md +++ b/README.md @@ -113,11 +113,16 @@ conda activate gptac_venv # 激活anaconda环境 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 ``` -P.S. 如果需要依赖Latex的插件功能,请见Wiki -

如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处

+ +

如果需要依赖Latex的插件功能(如Arxiv文献翻译),请点击展开此处 +

+ 请见[Wiki](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8Latex%E7%9B%B8%E5%85%B3%E6%8F%92%E4%BB%B6%EF%BC%88arxiv%E6%96%87%E7%AB%A0%E7%BF%BB%E8%AF%91%EF%BC%89) +

+
+ 【可选步骤】如果需要支持清华ChatGLM/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强): ```sh # 【可选步骤I】支持清华ChatGLM。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) From a06e43c96b9f6c199b0d440d5db2e7247224a18b Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Sun, 18 Jun 2023 16:15:37 +0800 Subject: [PATCH 40/78] Update README.md --- README.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index 6e461c2..c69bfb5 100644 --- a/README.md +++ b/README.md @@ -113,16 +113,10 @@ conda activate gptac_venv # 激活anaconda环境 python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤 ``` +
如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处

- -

如果需要依赖Latex的插件功能(如Arxiv文献翻译),请点击展开此处 -

- 请见[Wiki](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8Latex%E7%9B%B8%E5%85%B3%E6%8F%92%E4%BB%B6%EF%BC%88arxiv%E6%96%87%E7%AB%A0%E7%BF%BB%E8%AF%91%EF%BC%89) -

-
- 【可选步骤】如果需要支持清华ChatGLM/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强): ```sh # 【可选步骤I】支持清华ChatGLM。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) From d5bab093f94523665c5b0a6b7781dd491123faff Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Mon, 19 Jun 2023 15:17:33 +1000 Subject: [PATCH 41/78] rename function names --- crazy_functions/latex_utils.py | 163 ++++++++++++++------------------- 1 file changed, 69 insertions(+), 94 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index eebce80..a984b2f 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -8,85 +8,65 @@ pj = os.path.join """ ======================================================================== Part One -Latex segmentation to a linklist +Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1) ======================================================================== """ PRESERVE = 0 TRANSFORM = 1 -def split_worker(text, mask, pattern, flags=0): +def set_forbidden_text(text, mask, pattern, flags=0): """ Add a preserve text area in this paper + e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}" + you can mask out (mask = PRESERVE so that text become untouchable for GPT) + everything between "\begin{equation}" and "\end{equation}" """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask -def set_transform_area(text, mask, pattern, flags=0): +def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): """ - Add a transform text area in this paper + Add a preserve text area in this paper (text become untouchable for GPT). + count the number of the braces so as to catch compelete text area. + e.g. + \caption{blablablablabla\texbf{blablabla}blablabla.} """ pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): - mask[res.span()[0] : res.span()[1]] = TRANSFORM + brace_level = -1 + p = begin = end = res.regs[0][0] + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 + p += 1 + end = p+1 + mask[begin:end] = PRESERVE return text, mask - -def split_worker_careful_brace(text, mask, pattern, flags=0): +def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0): """ - Move area into preserve area. - It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})". + Move area out of preserve area (make text editable for GPT) + count the number of the braces so as to catch compelete text area. + e.g. + \caption{blablablablabla\texbf{blablabla}blablabla.} """ pattern_compile = re.compile(pattern, flags) - res = pattern_compile.search(text) - - # 确保捕获组存在 - if res and len(res.regs) > 1: + for res in pattern_compile.finditer(text): brace_level = 0 p = begin = end = res.regs[1][0] - for _ in range(1024 * 16): - if text[p] == "}" and brace_level == 1: - break - elif text[p] == "}": - brace_level -= 1 - elif text[p] == "{": - brace_level += 1 + for _ in range(1024*16): + if text[p] == '}' and brace_level == 0: break + elif text[p] == '}': brace_level -= 1 + elif text[p] == '{': brace_level += 1 p += 1 end = p - mask[begin + 1 : end] = PRESERVE - split_worker_careful_brace(text[end:], mask[end:], pattern, flags=flags) - + mask[begin:end] = TRANSFORM return text, mask - -def split_worker_reverse_careful_brace(text, mask, pattern, flags=0): - """ - Move area out of preserve area. - It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})". - """ - pattern_compile = re.compile(pattern, flags) - res = pattern_compile.search(text) - - # 确保捕获组存在 - if res and len(res.regs) > 1: - brace_level = 0 - p = begin = end = res.regs[1][0] - for _ in range(1024 * 16): - if text[p] == "}" and brace_level == 1: - break - elif text[p] == "}": - brace_level -= 1 - elif text[p] == "{": - brace_level += 1 - p += 1 - end = p - mask[begin + 1 : end] = TRANSFORM - split_worker_reverse_careful_brace(text[end:], mask[end:], pattern, flags=flags) - - return text, mask - -def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): +def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): """ Find all \begin{} ... \end{} text block that with less than limit_n_lines lines. Add it to preserve area @@ -289,53 +269,48 @@ def split_subprocess(txt, project_folder, return_dict, opts): mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM # 吸收title与作者以上的部分 - text, mask = split_worker(text, mask, r".*?\\begin\{document\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL) # 删除iffalse注释 - text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) # 吸收在25行以内的begin-end组合 - text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25) + text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42) # 吸收匿名公式 - text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) - text, mask = split_worker(text, mask, r"\\\[.*?\\\]", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\$\$(.*?)\$\$", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\\[.*?\\\]", re.DOTALL) # 吸收其他杂项 - text, mask = split_worker(text, mask, r"\\section\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) - text, mask = split_worker(text, mask, r"\\item ") - text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}") - - text, mask = set_transform_area(text, mask, r"\\begin\{abstract\}.*?\\end\{abstract\}", re.DOTALL) - - text, mask = split_worker_careful_brace(text, mask, r"\\hl(\{.*\})", re.DOTALL) - text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption(\{.*\})", re.DOTALL) - text, mask = split_worker_reverse_careful_brace(text, mask, r"\\abstract(\{.*\})", re.DOTALL) - - text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") - text, mask = split_worker(text, mask, r"\\end\{(.*?)\}") - + text, mask = set_forbidden_text(text, mask, r"\\section\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\section\*\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\subsection\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\subsubsection\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\bibliography\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\bibliographystyle\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) + text, mask = set_forbidden_text(text, mask, r"\\item ") + text, mask = set_forbidden_text(text, mask, r"\\label\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\begin\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\vspace\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\hspace\{(.*?)\}") + text, mask = set_forbidden_text(text, mask, r"\\end\{(.*?)\}") + text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL) + # reverse 操作必须放在最后 + text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL) root = convert_to_linklist(text, mask) # 修复括号 From af7734dd35c62de6f85a18b00c3598527b85cfb4 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Mon, 19 Jun 2023 16:57:11 +1000 Subject: [PATCH 42/78] avoid file fusion --- crazy_functions/latex_utils.py | 2 +- main.py | 4 ++-- toolbox.py | 14 ++++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 78eec29..163d0e2 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -685,7 +685,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') if os.path.exists(pj(work_folder, '..', 'translation')): shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) - promote_file_to_downloadzone(result_pdf) + promote_file_to_downloadzone(result_pdf, chatbot) return True # 成功啦 else: if n_fix>=max_try: break diff --git a/main.py b/main.py index 7dbf17f..65e1f4c 100644 --- a/main.py +++ b/main.py @@ -155,7 +155,7 @@ def main(): for k in crazy_fns: if not crazy_fns[k].get("AsButton", True): continue click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo) - click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) # 函数插件-下拉菜单与随变按钮的互动 def on_dropdown_changed(k): @@ -175,7 +175,7 @@ def main(): if k in [r"打开插件列表", r"请先从插件列表中选择"]: return yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs) click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo) - click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot]) + click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot]) cancel_handles.append(click_handle) # 终止按钮的回调函数注册 stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) diff --git a/toolbox.py b/toolbox.py index 4ab1116..ac49afc 100644 --- a/toolbox.py +++ b/toolbox.py @@ -439,13 +439,15 @@ def find_recent_files(directory): return recent_files -def promote_file_to_downloadzone(file, rename_file=None): +def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): # 将文件复制一份到下载区 import shutil if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}' new_path = os.path.join(f'./gpt_log/', rename_file) if os.path.exists(new_path): os.remove(new_path) shutil.copyfile(file, new_path) + if chatbot: + chatbot._cookies.update({'file_to_promote': [new_path]}) def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): """ @@ -485,16 +487,20 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): return chatbot, txt, txt2 -def on_report_generated(files, chatbot): +def on_report_generated(cookies, files, chatbot): from toolbox import find_recent_files - report_files = find_recent_files('gpt_log') + if 'file_to_promote' in cookies: + report_files = cookies['file_to_promote'] + cookies.pop('file_to_promote') + else: + report_files = find_recent_files('gpt_log') if len(report_files) == 0: return None, chatbot # files.extend(report_files) file_links = '' for f in report_files: file_links += f'
{f}' chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) - return report_files, chatbot + return cookies, report_files, chatbot def is_openai_api_key(key): API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) From f3e4e26e2f095e1f0d3b5faeaec23fde2b0b33a0 Mon Sep 17 00:00:00 2001 From: dackdawn Date: Mon, 19 Jun 2023 21:40:26 +0800 Subject: [PATCH 43/78] =?UTF-8?q?=E6=B7=BB=E5=8A=A00613=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E7=9A=84=E5=A3=B0=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openai对gpt-3.5-turbo的RPM限制是3,而gpt-3.5-turbo-0613的RPM是60,虽然两个模型的内容是一致的,但是选定特定模型可以获得更高的RPM和TPM --- config.py | 2 +- request_llm/bridge_all.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/config.py b/config.py index 87e0ec9..917c268 100644 --- a/config.py +++ b/config.py @@ -46,7 +46,7 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ -AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0613", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # 本地LLM模型如ChatGLM的执行方式 CPU/GPU diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index a27407c..22fa04b 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -93,6 +93,24 @@ model_info = { "token_cnt": get_token_num_gpt35, }, + "gpt-3.5-turbo-0613": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 4096, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + + "gpt-3.5-turbo-16k-0613": { + "fn_with_ui": chatgpt_ui, + "fn_without_ui": chatgpt_noui, + "endpoint": openai_endpoint, + "max_token": 1024 * 16, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + "gpt-4": { "fn_with_ui": chatgpt_ui, "fn_without_ui": chatgpt_noui, From 5da633d94dfa13c7658956537bc7c6c0d37e8a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lebenito=EF=BC=88=E7=94=9F=E7=B3=B8=EF=BC=89?= Date: Tue, 20 Jun 2023 19:10:11 +0800 Subject: [PATCH 44/78] Update README.md Fix the error URL for the git clone. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c69bfb5..7976076 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报 1. 下载项目 ```sh -git clone https://github.com/binary-husky/.git +git clone https://github.com/binary-husky/gpt_academic.git cd gpt_academic ``` From 61eb0da861526ccee760caba86ffca387d9af358 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Tue, 20 Jun 2023 22:08:09 +1000 Subject: [PATCH 45/78] fix encoding bug --- crazy_functions/latex_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 163d0e2..308044f 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -175,9 +175,8 @@ def merge_tex_files(project_foler, main_file, mode): main_file = main_file[:position] + add_ctex + add_url + main_file[position:] # fontset=windows import platform - if platform.system() != 'Windows': - main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file) - main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file) + main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file) + main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file) # find paper abstract pattern = re.compile(r'\\begin\{abstract\}.*\n') match = pattern.search(main_file) From bf955aaf12e94674877ca61d02d197547ae05cee Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Tue, 20 Jun 2023 23:12:30 +1000 Subject: [PATCH 46/78] fix bugs --- crazy_functional.py | 45 +++++++++++++++++++---------- crazy_functions/Latex输出PDF结果.py | 23 ++++++++++----- crazy_functions/latex_utils.py | 24 ++++++++------- crazy_functions/对话历史存档.py | 7 ++--- toolbox.py | 4 +-- 5 files changed, 63 insertions(+), 40 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index 2f0fbaa..6ad2dc8 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -358,29 +358,42 @@ def get_crazy_functions(): }) from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF function_plugins.update({ - "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": { + "Arixv翻译(输入arxivID) [需Latex]": { "Color": "stop", "AsButton": False, - # "AdvancedArgs": True, - # "ArgsReminder": "", + "AdvancedArgs": True, + "ArgsReminder": + "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', + "Function": HotReload(Latex翻译中文并重新编译PDF) + } + }) + function_plugins.update({ + "本地论文翻译(上传Latex压缩包) [需Latex]": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": + "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', "Function": HotReload(Latex翻译中文并重新编译PDF) } }) except: print('Load function plugin failed') - try: - from crazy_functions.虚空终端 import 终端 - function_plugins.update({ - "超级终端": { - "Color": "stop", - "AsButton": False, - # "AdvancedArgs": True, - # "ArgsReminder": "", - "Function": HotReload(终端) - } - }) - except: - print('Load function plugin failed') + # try: + # from crazy_functions.虚空终端 import 终端 + # function_plugins.update({ + # "超级终端": { + # "Color": "stop", + # "AsButton": False, + # # "AdvancedArgs": True, + # # "ArgsReminder": "", + # "Function": HotReload(终端) + # } + # }) + # except: + # print('Load function plugin failed') return function_plugins diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 6c89751..214b00a 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -1,12 +1,13 @@ from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str +from functools import partial import glob, os, requests, time pj = os.path.join ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") # =================================== 工具函数 =============================================== 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' -def switch_prompt(pfg, mode): +def switch_prompt(pfg, mode, more_requirement): """ Generate prompts and system prompts based on the mode for proofreading or translating. Args: @@ -25,7 +26,7 @@ def switch_prompt(pfg, mode): f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] elif mode == 'translate_zh': - inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 + + inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Answer me only with the translated text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] @@ -79,7 +80,7 @@ def arxiv_download(chatbot, history, txt): os.makedirs(translation_dir) target_file = pj(translation_dir, 'translate_zh.pdf') if os.path.exists(target_file): - promote_file_to_downloadzone(target_file) + promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot) return target_file return False def is_float(s): @@ -88,8 +89,10 @@ def arxiv_download(chatbot, history, txt): return True except ValueError: return False - if ('.' in txt) and ('/' not in txt) and is_float(txt): + if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID txt = 'https://arxiv.org/abs/' + txt + if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID + txt = 'https://arxiv.org/abs/' + txt[:10] if not txt.startswith('https://arxiv.org'): return txt, None @@ -177,7 +180,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> if not os.path.exists(project_folder + '/merge_proofread.tex'): - yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, + chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) # <-------------- compile PDF -------------> @@ -208,6 +212,10 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, "对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 + # <-------------- more requirements -------------> + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + more_req = plugin_kwargs.get("advanced_arg", "") + _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) # <-------------- check deps -------------> try: @@ -255,11 +263,12 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> if not os.path.exists(project_folder + '/merge_translate_zh.tex'): - yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt) + yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, + chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_) # <-------------- compile PDF -------------> - success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) # <-------------- zip PDF -------------> diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 308044f..58ac413 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -403,7 +403,7 @@ class LatexPaperSplit(): def __init__(self) -> None: self.nodes = None self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ - "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \ + "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" @@ -623,7 +623,7 @@ def compile_latex_with_timeout(command, timeout=60): return False return True -def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder): +def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'): import os, time current_dir = os.getcwd() n_fix = 1 @@ -634,6 +634,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f while True: import os + # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) @@ -655,15 +656,16 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) - yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 - print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') - ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + if mode!='translate_zh': + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 + print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') + ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') - yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) # <---------------------> os.chdir(current_dir) @@ -684,7 +686,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') if os.path.exists(pj(work_folder, '..', 'translation')): shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) - promote_file_to_downloadzone(result_pdf, chatbot) + promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) return True # 成功啦 else: if n_fix>=max_try: break diff --git a/crazy_functions/对话历史存档.py b/crazy_functions/对话历史存档.py index c638d1b..fed0f8f 100644 --- a/crazy_functions/对话历史存档.py +++ b/crazy_functions/对话历史存档.py @@ -1,4 +1,4 @@ -from toolbox import CatchException, update_ui +from toolbox import CatchException, update_ui, promote_file_to_downloadzone from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive import re @@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None): for h in history: f.write("\n>>>" + h) f.write('') - res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}') - print(res) - return res + promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot) + return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}') def gen_file_preview(file_name): try: diff --git a/toolbox.py b/toolbox.py index ac49afc..ff936d6 100644 --- a/toolbox.py +++ b/toolbox.py @@ -444,8 +444,8 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): import shutil if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}' new_path = os.path.join(f'./gpt_log/', rename_file) - if os.path.exists(new_path): os.remove(new_path) - shutil.copyfile(file, new_path) + if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path) + if not os.path.exists(new_path): shutil.copyfile(file, new_path) if chatbot: chatbot._cookies.update({'file_to_promote': [new_path]}) From cb0bb6ab4a9b458118435220086bb60cea238416 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Wed, 21 Jun 2023 00:41:33 +1000 Subject: [PATCH 47/78] fix minor bugs --- crazy_functional.py | 22 ++++++------- crazy_functions/Latex输出PDF结果.py | 7 +++-- crazy_functions/crazy_utils.py | 48 +++++++++++++++++++++++++++++ crazy_functions/latex_utils.py | 26 ++++++++++++++++ toolbox.py | 12 +++++--- 5 files changed, 98 insertions(+), 17 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index 6ad2dc8..ded0698 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -368,17 +368,17 @@ def get_crazy_functions(): "Function": HotReload(Latex翻译中文并重新编译PDF) } }) - function_plugins.update({ - "本地论文翻译(上传Latex压缩包) [需Latex]": { - "Color": "stop", - "AsButton": False, - "AdvancedArgs": True, - "ArgsReminder": - "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ - "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', - "Function": HotReload(Latex翻译中文并重新编译PDF) - } - }) + # function_plugins.update({ + # "本地论文翻译(上传Latex压缩包) [需Latex]": { + # "Color": "stop", + # "AsButton": False, + # "AdvancedArgs": True, + # "ArgsReminder": + # "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + # "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', + # "Function": HotReload(Latex翻译中文并重新编译PDF) + # } + # }) except: print('Load function plugin failed') diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 214b00a..4f19967 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -108,6 +108,7 @@ def arxiv_download(chatbot, history, txt): return msg, None # <-------------- set format -------------> arxiv_id = url_.split('/abs/')[-1] + if 'v' in arxiv_id: arxiv_id = arxiv_id[:10] cached_translation_pdf = check_cached_translation_pdf(arxiv_id) if cached_translation_pdf: return cached_translation_pdf, arxiv_id @@ -190,13 +191,14 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- zip PDF -------------> - zip_result(project_folder) + zip_res = zip_result(project_folder) if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) # <-------------- we are done -------------> return success @@ -272,13 +274,14 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) # <-------------- zip PDF -------------> - zip_result(project_folder) + zip_res = zip_result(project_folder) if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) # <-------------- we are done -------------> diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py index 96301ff..a1b1493 100644 --- a/crazy_functions/crazy_utils.py +++ b/crazy_functions/crazy_utils.py @@ -698,3 +698,51 @@ def try_install_deps(deps): for dep in deps: import subprocess, sys subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep]) + + +class construct_html(): + def __init__(self) -> None: + self.css = """ +.row { + display: flex; + flex-wrap: wrap; +} + +.column { + flex: 1; + padding: 10px; +} + +.table-header { + font-weight: bold; + border-bottom: 1px solid black; +} + +.table-row { + border-bottom: 1px solid lightgray; +} + +.table-cell { + padding: 5px; +} + """ + self.html_string = f'翻译结果' + + + def add_row(self, a, b): + tmp = """ +
+
REPLACE_A
+
REPLACE_B
+
+ """ + from toolbox import markdown_convertion + tmp = tmp.replace('REPLACE_A', markdown_convertion(a)) + tmp = tmp.replace('REPLACE_B', markdown_convertion(b)) + self.html_string += tmp + + + def save_file(self, file_name): + with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f: + f.write(self.html_string.encode('utf-8', 'ignore').decode()) + diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 58ac413..a1e7758 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -497,7 +497,32 @@ class LatexPaperFileGroup(): f.write(res) return manifest +def write_html(sp_file_contents, sp_file_result, chatbot): + # write html + try: + import copy + from .crazy_utils import construct_html + from toolbox import gen_time_str + ch = construct_html() + orig = "" + trans = "" + final = [] + for c,r in zip(sp_file_contents, sp_file_result): + final.append(c) + final.append(r) + for i, k in enumerate(final): + if i%2==0: + orig = k + if i%2==1: + trans = k + ch.add_row(a=orig, b=trans) + create_report_file_name = f"{gen_time_str()}.trans.html" + ch.save_file(create_report_file_name) + promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot) + except: + from toolbox import trimmed_format_exc + print('writing html result failed:', trimmed_format_exc()) def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]): import time, os, re @@ -574,6 +599,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin pfg.get_token_num = None objdump(pfg, file=pj(project_folder,'temp.pkl')) + write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot) # <-------- 写出文件 ----------> msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" diff --git a/toolbox.py b/toolbox.py index ff936d6..fb6aa9f 100644 --- a/toolbox.py +++ b/toolbox.py @@ -6,6 +6,7 @@ import re import os from latex2mathml.converter import convert as tex2mathml from functools import wraps, lru_cache +pj = os.path.join """ ======================================================================== @@ -399,7 +400,7 @@ def extract_archive(file_path, dest_dir): print("Successfully extracted rar archive to {}".format(dest_dir)) except: print("Rar format requires additional dependencies to install") - return '\n\n需要安装pip install rarfile来解压rar文件' + return '\n\n解压失败! 需要安装pip install rarfile来解压rar文件' # 第三方库,需要预先pip install py7zr elif file_extension == '.7z': @@ -410,7 +411,7 @@ def extract_archive(file_path, dest_dir): print("Successfully extracted 7z archive to {}".format(dest_dir)) except: print("7z format requires additional dependencies to install") - return '\n\n需要安装pip install py7zr来解压7z文件' + return '\n\n解压失败! 需要安装pip install py7zr来解压7z文件' else: return '' return '' @@ -447,7 +448,9 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path) if not os.path.exists(new_path): shutil.copyfile(file, new_path) if chatbot: - chatbot._cookies.update({'file_to_promote': [new_path]}) + if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote'] + else: current = [] + chatbot._cookies.update({'file_to_promote': [new_path] + current}) def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): """ @@ -802,7 +805,8 @@ def zip_result(folder): import time t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) zip_folder(folder, './gpt_log/', f'{t}-result.zip') - + return pj('./gpt_log/', f'{t}-result.zip') + def gen_time_str(): import time return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) From d7b056576d51945808dcb99733ec7931aedad5be Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Wed, 21 Jun 2023 00:52:58 +1000 Subject: [PATCH 48/78] add latex docker-compose --- docs/GithubAction+NoLocal+Latex | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 docs/GithubAction+NoLocal+Latex diff --git a/docs/GithubAction+NoLocal+Latex b/docs/GithubAction+NoLocal+Latex new file mode 100644 index 0000000..5ff9bb8 --- /dev/null +++ b/docs/GithubAction+NoLocal+Latex @@ -0,0 +1,25 @@ +# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM +# - 1 修改 `config.py` +# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex . +# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex + +FROM fuqingxu/python311_texlive_ctex:latest + +# 指定路径 +WORKDIR /gpt + +RUN pip3 install gradio openai numpy arxiv rich +RUN pip3 install colorama Markdown pygments pymupdf + +# 装载项目文件 +COPY . . + + +# 安装依赖 +RUN pip3 install -r requirements.txt + +# 可选步骤,用于预热模块 +RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()' + +# 启动 +CMD ["python3", "-u", "main.py"] From 22a65cd1637e0d690c7db0326ddb2f5f312c0764 Mon Sep 17 00:00:00 2001 From: binary-husky <96192199+binary-husky@users.noreply.github.com> Date: Wed, 21 Jun 2023 00:55:24 +1000 Subject: [PATCH 49/78] Create build-with-latex.yml --- .github/workflows/build-with-latex.yml | 44 ++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 .github/workflows/build-with-latex.yml diff --git a/.github/workflows/build-with-latex.yml b/.github/workflows/build-with-latex.yml new file mode 100644 index 0000000..fb16d2c --- /dev/null +++ b/.github/workflows/build-with-latex.yml @@ -0,0 +1,44 @@ +# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages +name: Create and publish a Docker image for Latex support + +on: + push: + branches: + - 'master' + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }}_with_latex + +jobs: + build-and-push-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + push: true + file: docs/GithubAction+NoLocal+Latex + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} From 1fede6df7fc182a355fac65fc4487e1b579d7be7 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Tue, 20 Jun 2023 23:05:17 +0800 Subject: [PATCH 50/78] temp --- crazy_functional.py | 5 +++-- crazy_functions/Latex输出PDF结果.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index d8ca9ae..abd44d7 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -119,6 +119,7 @@ def get_crazy_functions(): }, "[插件demo] 历史上的今天": { # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 + "AsButton": False, # 加入下拉菜单中 "Function": HotReload(高阶功能模板函数) }, @@ -358,9 +359,9 @@ def get_crazy_functions(): }) from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF function_plugins.update({ - "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": { + "Arixv论文精细翻译": { "Color": "stop", - "AsButton": False, + "AsButton": True, # "AdvancedArgs": True, # "ArgsReminder": "", "Function": HotReload(Latex翻译中文并重新编译PDF) diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 2e9a30b..1d5e103 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -89,7 +89,7 @@ def arxiv_download(chatbot, history, txt): except ValueError: return False if ('.' in txt) and ('/' not in txt) and is_float(txt): - txt = 'https://arxiv.org/abs/' + txt + txt = 'https://arxiv.org/abs/' + txt.strip() if not txt.startswith('https://arxiv.org'): return txt, None From cf5f348d704cfadaeb7c86bdf43bfdc219f68a47 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Wed, 21 Jun 2023 11:20:31 +1000 Subject: [PATCH 51/78] update test samples --- crazy_functions/crazy_functions_test.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index e743878..b4ff5e2 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -189,6 +189,7 @@ def test_Latex(): # txt = r"https://arxiv.org/abs/2211.16068" # ACE # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE txt = r"https://arxiv.org/abs/2002.09253" + txt = r"https://arxiv.org/abs/2306.07831" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) @@ -217,6 +218,7 @@ def test_Latex(): # test_数学动画生成manim() # test_Langchain知识库() # test_Langchain知识库读取() -test_Latex() -input("程序完成,回车退出。") -print("退出。") \ No newline at end of file +if __name__ == "__main__": + test_Latex() + input("程序完成,回车退出。") + print("退出。") \ No newline at end of file From d87f1eb17133a31707152f84d37cf6e9d2e4e5dc Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Wed, 21 Jun 2023 11:38:59 +1000 Subject: [PATCH 52/78] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A5=E5=85=A5azure?= =?UTF-8?q?=E7=9A=84=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 11 ++-- config.py | 13 +++-- docs/use_azure.md | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 10 deletions(-) create mode 100644 docs/use_azure.md diff --git a/README.md b/README.md index 7976076..7760260 100644 --- a/README.md +++ b/README.md @@ -186,16 +186,19 @@ docker-compose up 2. 使用docker-compose运行。 请阅读docker-compose.yml后,按照其中的提示操作即可 -3. 如何使用反代URL/微软云AzureAPI。 +3. 如何使用反代URL 按照`config.py`中的说明配置API_URL_REDIRECT即可。 -4. 远程云服务器部署(需要云服务器知识与经验)。 +4. 微软云AzureAPI +按照`config.py`中的说明配置即可(AZURE_ENDPOINT等四个配置) + +5. 远程云服务器部署(需要云服务器知识与经验)。 请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) -5. 使用WSL2(Windows Subsystem for Linux 子系统)。 +6. 使用WSL2(Windows Subsystem for Linux 子系统)。 请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) -6. 如何在二级网址(如`http://localhost/subpath`)下运行。 +7. 如何在二级网址(如`http://localhost/subpath`)下运行。 请访问[FastAPI运行说明](docs/WithFastapi.md) --- diff --git a/config.py b/config.py index cb26cbb..b173862 100644 --- a/config.py +++ b/config.py @@ -1,12 +1,6 @@ # [step 1]>> 例如: API_KEY = "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" (此key无效) API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2" -#增加关于AZURE的配置信息, 可以在AZURE网页中找到 -AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/" -AZURE_API_KEY = "填入azure openai api的密钥" -AZURE_API_VERSION = "填入api版本" -AZURE_ENGINE = "填入ENGINE" - # [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改 USE_PROXY = False @@ -88,3 +82,10 @@ your bing cookies here # 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md SLACK_CLAUDE_BOT_ID = '' SLACK_CLAUDE_USER_TOKEN = '' + + +# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md +AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/" +AZURE_API_KEY = "填入azure openai api的密钥" +AZURE_API_VERSION = "填入api版本" +AZURE_ENGINE = "填入ENGINE" diff --git a/docs/use_azure.md b/docs/use_azure.md new file mode 100644 index 0000000..626b132 --- /dev/null +++ b/docs/use_azure.md @@ -0,0 +1,143 @@ +# 通过微软Azure云服务申请 Openai API + +由于Openai和微软的关系,现在是可以通过微软的Azure云计算服务直接访问openai的api,免去了注册和网络的问题。 + +快速入门的官方文档的链接是:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) + +# 申请API + +按文档中的“先决条件”的介绍,出了编程的环境以外,还需要以下三个条件: + +1.  Azure账号并创建订阅 + +2.  为订阅添加Azure OpenAI 服务 + +3.  部署模型 + +## Azure账号并创建订阅 + +### Azure账号 + +创建Azure的账号时最好是有微软的账号,这样似乎更容易获得免费额度(第一个月的200美元,实测了一下,如果用一个刚注册的微软账号登录Azure的话,并没有这一个月的免费额度)。 + +创建Azure账号的网址是:[立即创建 Azure 免费帐户 | Microsoft Azure](https://azure.microsoft.com/zh-cn/free/) + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_944786_iH6AECuZ_tY0EaBd_1685327219?w=1327\&h=695\&type=image/png) + +打开网页后,点击 “免费开始使用” 会跳转到登录或注册页面,如果有微软的账户,直接登录即可,如果没有微软账户,那就需要到微软的网页再另行注册一个。 + +注意,Azure的页面和政策时不时会变化,已实际最新显示的为准就好。 + +### 创建订阅 + +注册好Azure后便可进入主页: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_444847_tk-9S-pxOYuaLs_K_1685327675?w=1865\&h=969\&type=image/png) + +首先需要在订阅里进行添加操作,点开后即可进入订阅的页面: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_612820_z_1AlaEgnJR-rUl0_1685327892?w=1865\&h=969\&type=image/png) + +第一次进来应该是空的,点添加即可创建新的订阅(可以是“免费”或者“即付即用”的订阅),其中订阅ID是后面申请Azure OpenAI需要使用的。 + +## 为订阅添加Azure OpenAI服务 + +之后回到首页,点Azure OpenAI即可进入OpenAI服务的页面(如果不显示的话,则在首页上方的搜索栏里搜索“openai”即可)。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_269759_nExkGcPC0EuAR5cp_1685328130?w=1865\&h=969\&type=image/png) + +不过现在这个服务还不能用。在使用前,还需要在这个网址申请一下: + +[Request Access to Azure OpenAI Service (microsoft.com)](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu) + +这里有二十来个问题,按照要求和自己的实际情况填写即可。 + +其中需要注意的是 + +1.  千万记得填对"订阅ID" + +2.  需要填一个公司邮箱(可以不是注册用的邮箱)和公司网址 + +之后,在回到上面那个页面,点创建,就会进入创建页面了: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_72708_9d9JYhylPVz3dFWL_1685328372?w=824\&h=590\&type=image/png) + +需要填入“资源组”和“名称”,按照自己的需要填入即可。 + +完成后,在主页的“资源”里就可以看到刚才创建的“资源”了,点击进入后,就可以进行最后的部署了。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_871541_CGCnbgtV9Uk1Jccy_1685329861?w=1217\&h=628\&type=image/png) + +## 部署模型 + +进入资源页面后,在部署模型前,可以先点击“开发”,把密钥和终结点记下来。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_852567_dxCZOrkMlWDSLH0d_1685330736?w=856\&h=568\&type=image/png) + +之后,就可以去部署模型了,点击“部署”即可,会跳转到 Azure OpenAI Stuido 进行下面的操作: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_169225_uWs1gMhpNbnwW4h2_1685329901?w=1865\&h=969\&type=image/png) + +进入 Azure OpenAi Studio 后,点击新建部署,会弹出如下对话框: + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_391255_iXUSZAzoud5qlxjJ_1685330224?w=656\&h=641\&type=image/png) + +在这里选 gpt-35-turbo 或需要的模型并按需要填入“部署名”即可完成模型的部署。 + +![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_724099_vBaHcUilsm1EtPgK_1685330396?w=1869\&h=482\&type=image/png) + +这个部署名需要记下来。 + +到现在为止,申请操作就完成了,需要记下来的有下面几个东西: + +● 密钥(1或2都可以) + +● 终结点 + +● 部署名(不是模型名) + +# API的使用 + +接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) + +和openai自己的api调用有点类似,都需要安装openai库,不同的是调用方式 + +``` +import openai +openai.api_type = "azure" #固定格式,无需修改 +openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点” +openai.api_version = "2023-05-15" #固定格式,无需修改 +openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2” + +response = openai.ChatCompletion.create( + engine="gpt-35-turbo", #这里填入的不是模型名,是部署名 + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"}, + {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."}, + {"role": "user", "content": "Do other Azure Cognitive Services support this too?"} + ] +) + +print(response) +print(response['choices'][0]['message']['content']) + +``` + +需要注意的是: + +1.  engine那里填入的是部署名,不是模型名 + +2.  通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同,不需要 decode,已经是解析好的 json 了,直接根据键值读取即可。 + +更细节的使用方法,详见官方API文档。 + +# 关于费用 + +Azure OpenAI API 还是需要一些费用的(免费订阅只有1个月有效期),费用如下: + +![image.png](https://note.youdao.com/yws/res/18095/WEBRESOURCEeba0ab6d3127b79e143ef2d5627c0e44) + +具体可以可以看这个网址 :[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable) + +并非网上说的什么“一年白嫖”,但注册方法以及网络问题都比直接使用openai的api要简单一些。 From cd389499035e2e2684063da6c9b8c5b24002fdfb Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Wed, 21 Jun 2023 11:53:57 +1000 Subject: [PATCH 53/78] =?UTF-8?q?=E5=BD=93=E9=81=87=E5=88=B0=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E6=97=B6=EF=BC=8C=E5=9B=9E=E6=BB=9A=E5=88=B0=E5=8E=9F?= =?UTF-8?q?=E6=96=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index a1e7758..48df10b 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -212,6 +212,8 @@ def fix_content(final_tex, node_string): final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex) final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex) + if "Traceback" in final_tex and "[Local Message]" in final_tex: + final_tex = node_string # 出问题了,还原原文 if node_string.count('\\begin') != final_tex.count('\\begin'): final_tex = node_string # 出问题了,还原原文 if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'): From 74941170aaec1838fbc0e99963588458addcc9b8 Mon Sep 17 00:00:00 2001 From: Ranhuiryan Date: Wed, 21 Jun 2023 16:19:26 +0800 Subject: [PATCH 54/78] update azure use instruction --- docs/use_azure.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/use_azure.md b/docs/use_azure.md index 626b132..f1c27ef 100644 --- a/docs/use_azure.md +++ b/docs/use_azure.md @@ -96,6 +96,15 @@ ● 部署名(不是模型名) +# 修改 config.py + +``` +AZURE_ENDPOINT = "填入终结点" +AZURE_API_KEY = "填入azure openai api的密钥" +AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改 +AZURE_ENGINE = "填入部署名" + +``` # API的使用 接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) From 33d2e75aac8063f9d8fe615599fccf948b48424e Mon Sep 17 00:00:00 2001 From: Ranhuiryan Date: Wed, 21 Jun 2023 16:19:49 +0800 Subject: [PATCH 55/78] add azure-gpt35 to model list --- config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.py b/config.py index b173862..557b4e9 100644 --- a/config.py +++ b/config.py @@ -47,7 +47,7 @@ MAX_RETRY = 2 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ -AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] +AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] # P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # 本地LLM模型如ChatGLM的执行方式 CPU/GPU From d841d13b047207fc15e277601ab1140e33988a9e Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Sun, 25 Jun 2023 22:12:44 +0800 Subject: [PATCH 56/78] add arxiv translation test samples --- crazy_functions/crazy_functions_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index b4ff5e2..6e17fb3 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -188,8 +188,13 @@ def test_Latex(): # txt = r"https://arxiv.org/abs/2305.17608" # txt = r"https://arxiv.org/abs/2211.16068" # ACE # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE - txt = r"https://arxiv.org/abs/2002.09253" - txt = r"https://arxiv.org/abs/2306.07831" + # txt = r"https://arxiv.org/abs/2002.09253" + # txt = r"https://arxiv.org/abs/2306.07831" + # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result" + # txt = r"https://arxiv.org/abs/2212.10156" + txt = r"https://arxiv.org/abs/2211.11559" + + for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): cli_printer.print(cb) # print(cb) From b8560b75101437f7ab13e478c63d6a412d815790 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Sun, 25 Jun 2023 22:46:16 +0800 Subject: [PATCH 57/78] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E8=AF=AF=E5=88=A4latex?= =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E6=96=87=E4=BB=B6=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_functions_test.py | 1 - crazy_functions/latex_utils.py | 28 ++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 6e17fb3..7edd04f 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -190,7 +190,6 @@ def test_Latex(): # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" - # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result" # txt = r"https://arxiv.org/abs/2212.10156" txt = r"https://arxiv.org/abs/2211.11559" diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 48df10b..def4be2 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -110,19 +110,41 @@ Latex Merge File def 寻找Latex主文件(file_manifest, mode): """ 在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。 - P.S. 但愿没人把latex模板放在里面传进来 + P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码) """ + canidates = [] for texf in file_manifest: if os.path.basename(texf).startswith('merge'): continue with open(texf, 'r', encoding='utf8') as f: file_content = f.read() if r'\documentclass' in file_content: - return texf + canidates.append(texf) else: continue - raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + if len(canidates) == 0: + raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)') + elif len(canidates) == 1: + return canidates[0] + else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回 + canidates_score = [] + # 给出一些判定模板文档的词作为扣分项 + unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers'] + expected_words = ['\input', '\ref', '\cite'] + for texf in canidates: + canidates_score.append(0) + with open(texf, 'r', encoding='utf8') as f: + file_content = f.read() + for uw in unexpected_words: + if uw in file_content: + canidates_score[-1] -= 1 + for uw in expected_words: + if uw in file_content: + canidates_score[-1] += 1 + select = np.argmax(canidates_score) # 取评分最高者返回 + return canidates[select] + def rm_comments(main_file): new_file_remove_comment_lines = [] for l in main_file.splitlines(): From 9f0cf9fb2b3546e13a94f6cb9d6e0fa44eaffad9 Mon Sep 17 00:00:00 2001 From: 505030475 <505030475@qq.com> Date: Sun, 25 Jun 2023 23:30:31 +0800 Subject: [PATCH 58/78] =?UTF-8?q?arxiv=20PDF=20=E5=BC=95=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_functions_test.py | 3 ++- crazy_functions/latex_utils.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 7edd04f..3ef555d 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -191,7 +191,8 @@ def test_Latex(): # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" # txt = r"https://arxiv.org/abs/2212.10156" - txt = r"https://arxiv.org/abs/2211.11559" + # txt = r"https://arxiv.org/abs/2211.11559" + txt = r"https://arxiv.org/abs/2303.08774" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index def4be2..3e4f37c 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -314,6 +314,7 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL) text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL) text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL) + text, mask = split_worker(text, mask, r"\\includepdf\[(.*?)\]\{(.*?)\}") text, mask = split_worker(text, mask, r"\\item ") text, mask = split_worker(text, mask, r"\\label\{(.*?)\}") text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}") From 280e14d7b7794a2e94193d553e8bd271dd0fd3f7 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Mon, 26 Jun 2023 09:59:14 +0800 Subject: [PATCH 59/78] =?UTF-8?q?=E6=9B=B4=E6=96=B0Latex=E6=A8=A1=E5=9D=97?= =?UTF-8?q?=E7=9A=84docker-compose?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 19 ++++++------------- docker-compose.yml | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 7760260..b8b76c9 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ cd gpt_academic 2. 配置API_KEY -在`config.py`中,配置API KEY等设置,[特殊网络环境设置](https://github.com/binary-husky/gpt_academic/issues/1) 。 +在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。 (P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。因此,如果您能理解我们的配置读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中。`config_private.py`不受git管控,可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项,环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`) @@ -140,15 +140,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt- python main.py ``` -5. 测试函数插件 -``` -- 测试函数插件模板函数(要求gpt回答历史上的今天发生了什么),您可以根据此函数为模板,实现更复杂的功能 - 点击 "[函数插件模板Demo] 历史上的今天" -``` - ## 安装-方法2:使用Docker -1. 仅ChatGPT(推荐大多数人选择) +1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1) ``` sh git clone https://github.com/binary-husky/gpt_academic.git # 下载项目 @@ -161,26 +155,25 @@ docker run --rm -it --net=host gpt-academic #(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口 docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic ``` -P.S. 如果需要依赖Latex的插件功能,请见Wiki +P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。 2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker) ``` sh -# 修改docker-compose.yml,删除方案1和方案3,保留方案2。修改docker-compose.yml中方案2的配置,参考其中注释即可 +# 修改docker-compose.yml,保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置,参考其中注释即可 docker-compose up ``` 3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉Docker) ``` sh -# 修改docker-compose.yml,删除方案1和方案2,保留方案3。修改docker-compose.yml中方案3的配置,参考其中注释即可 +# 修改docker-compose.yml,保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置,参考其中注释即可 docker-compose up ``` ## 安装-方法3:其他部署姿势 1. 一键运行脚本。 -完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本, -不建议电脑上已有python的用户采用此方法(在此基础上安装插件的依赖很麻烦)。 +完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。 脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。 2. 使用docker-compose运行。 diff --git a/docker-compose.yml b/docker-compose.yml index 07f1c9f..0a0dcda 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -103,3 +103,30 @@ services: echo '[jittorllms] 正在从github拉取最新代码...' && git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force && python3 -u main.py" + + +## =================================================== +## 【方案四】 chatgpt + Latex +## =================================================== +version: '3' +services: + gpt_academic_with_latex: + image: ghcr.io/binary-husky/gpt_academic_with_latex:master + environment: + # 请查阅 `config.py` 以查看所有的配置信息 + API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx ' + USE_PROXY: ' True ' + proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } ' + LLM_MODEL: ' gpt-3.5-turbo ' + AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "gpt-4"] ' + LOCAL_MODEL_DEVICE: ' cuda ' + DEFAULT_WORKER_NUM: ' 10 ' + WEB_PORT: ' 12303 ' + + # 与宿主的网络融合 + network_mode: "host" + + # 不使用代理网络拉取最新代码 + command: > + bash -c "python3 -u main.py" + From 4290821a504ec2996241c09b262653111c7208b8 Mon Sep 17 00:00:00 2001 From: Xminry <46775500+Xminry@users.noreply.github.com> Date: Tue, 27 Jun 2023 01:57:31 +0800 Subject: [PATCH 60/78] =?UTF-8?q?Update=20=E7=90=86=E8=A7=A3PDF=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E5=86=85=E5=AE=B9.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/理解PDF文档内容.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py index 5050864..f1a89a7 100644 --- a/crazy_functions/理解PDF文档内容.py +++ b/crazy_functions/理解PDF文档内容.py @@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro # 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割) # 的长度必须小于 2500 个 Token file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF - + file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars + TOKEN_LIMIT_PER_FRAGMENT = 2500 from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf From f654c1af317ab6fccb40b0097800690a786d8d5d Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Tue, 27 Jun 2023 18:59:56 +0800 Subject: [PATCH 61/78] merge regex expressions --- crazy_functions/crazy_functions_test.py | 6 +- crazy_functions/latex_utils.py | 74 ++++++++++++------------- 2 files changed, 39 insertions(+), 41 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 3ef555d..f2d3969 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -190,9 +190,11 @@ def test_Latex(): # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" - # txt = r"https://arxiv.org/abs/2212.10156" + txt = r"https://arxiv.org/abs/2212.10156" # txt = r"https://arxiv.org/abs/2211.11559" - txt = r"https://arxiv.org/abs/2303.08774" + # txt = r"https://arxiv.org/abs/2303.08774" + # txt = r"https://arxiv.org/abs/2303.12712" + # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index a7eb9f2..83c4401 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -21,6 +21,7 @@ def set_forbidden_text(text, mask, pattern, flags=0): you can mask out (mask = PRESERVE so that text become untouchable for GPT) everything between "\begin{equation}" and "\end{equation}" """ + if isinstance(pattern, list): pattern = '|'.join(pattern) pattern_compile = re.compile(pattern, flags) for res in pattern_compile.finditer(text): mask[res.span()[0]:res.span()[1]] = PRESERVE @@ -46,7 +47,7 @@ def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): mask[begin:end] = PRESERVE return text, mask -def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0): +def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True): """ Move area out of preserve area (make text editable for GPT) count the number of the braces so as to catch compelete text area. @@ -64,6 +65,9 @@ def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0): p += 1 end = p mask[begin:end] = TRANSFORM + if forbid_wrapper: + mask[res.regs[0][0]:begin] = PRESERVE + mask[end:res.regs[0][1]] = PRESERVE return text, mask def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42): @@ -163,6 +167,7 @@ def rm_comments(main_file): else: new_file_remove_comment_lines.append(l) main_file = '\n'.join(new_file_remove_comment_lines) + # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令 main_file = re.sub(r'(? Date: Tue, 27 Jun 2023 19:16:05 +0800 Subject: [PATCH 62/78] add `item` breaker --- crazy_functions/latex_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 83c4401..49f547c 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -302,7 +302,7 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL) # 吸收iffalse注释 text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL) - # 吸收在25行以内的begin-end组合 + # 吸收在42行以内的begin-end组合 text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42) # 吸收匿名公式 text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$", r"\\\[.*?\\\]" ], re.DOTALL) @@ -321,7 +321,7 @@ def split_subprocess(txt, project_folder, return_dict, opts): text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL) text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"]) - text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}"]) + text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "]) text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL) # reverse 操作必须放在最后 text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True) From 99cf7205c3059caaae0fa46f1739d602a95e1bf5 Mon Sep 17 00:00:00 2001 From: Xminry Date: Wed, 28 Jun 2023 10:30:08 +0800 Subject: [PATCH 63/78] =?UTF-8?q?feat:=E8=81=94=E7=BD=91=E6=90=9C=E7=B4=A2?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=8Ccn.bing.com=E7=89=88=EF=BC=8C?= =?UTF-8?q?=E5=9B=BD=E5=86=85=E5=8F=AF=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 12 +++ crazy_functions/联网的ChatGPT_bing版.py | 102 ++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 crazy_functions/联网的ChatGPT_bing版.py diff --git a/crazy_functional.py b/crazy_functional.py index a724b97..aea97a6 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -235,6 +235,18 @@ def get_crazy_functions(): except: print('Load function plugin failed') + try: + from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题 + function_plugins.update({ + "连接网络回答问题_bing搜索(先输入问题,再点击按钮,搜索引擎为cn.bing.com,国内可用)": { + "Color": "stop", + "AsButton": False, # 加入下拉菜单中 + "Function": HotReload(连接bing搜索回答问题) + } + }) + except: + print('Load function plugin failed') + try: from crazy_functions.解析项目源代码 import 解析任意code项目 function_plugins.update({ diff --git a/crazy_functions/联网的ChatGPT_bing版.py b/crazy_functions/联网的ChatGPT_bing版.py new file mode 100644 index 0000000..93a84a0 --- /dev/null +++ b/crazy_functions/联网的ChatGPT_bing版.py @@ -0,0 +1,102 @@ +from toolbox import CatchException, update_ui +from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping +import requests +from bs4 import BeautifulSoup +from request_llm.bridge_all import model_info + + +def bing_search(query, proxies=None): + query = query + url = f"https://cn.bing.com/search?q={query}" + headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'} + response = requests.get(url, headers=headers, proxies=proxies) + soup = BeautifulSoup(response.content, 'html.parser') + results = [] + for g in soup.find_all('li', class_='b_algo'): + anchors = g.find_all('a') + if anchors: + link = anchors[0]['href'] + if not link.startswith('http'): + continue + title = g.find('h2').text + item = {'title': title, 'link': link} + results.append(item) + + for r in results: + print(r['link']) + return results + + +def scrape_text(url, proxies) -> str: + """Scrape text from a webpage + + Args: + url (str): The URL to scrape text from + + Returns: + str: The scraped text + """ + headers = { + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36', + 'Content-Type': 'text/plain', + } + try: + response = requests.get(url, headers=headers, proxies=proxies, timeout=8) + if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding + except: + return "无法连接到该网页" + soup = BeautifulSoup(response.text, "html.parser") + for script in soup(["script", "style"]): + script.extract() + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = "\n".join(chunk for chunk in chunks if chunk) + return text + +@CatchException +def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + """ + txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径 + llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行 + plugin_kwargs 插件模型的参数,暂时没有用武之地 + chatbot 聊天显示框的句柄,用于显示给用户 + history 聊天历史,前情提要 + system_prompt 给gpt的静默提醒 + web_port 当前软件运行的端口号 + """ + history = [] # 清空历史,以免输入溢出 + chatbot.append((f"请结合互联网信息回答以下问题:{txt}", + "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!")) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + + # ------------- < 第1步:爬取搜索引擎的结果 > ------------- + from toolbox import get_conf + proxies, = get_conf('proxies') + urls = bing_search(txt, proxies) + history = [] + + # ------------- < 第2步:依次访问网页 > ------------- + max_search_result = 8 # 最多收纳多少个网页的结果 + for index, url in enumerate(urls[:max_search_result]): + res = scrape_text(url['link'], proxies) + history.extend([f"第{index}份搜索结果:", res]) + chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新 + + # ------------- < 第3步:ChatGPT综合 > ------------- + i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}" + i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token + inputs=i_say, + history=history, + max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4 + ) + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=i_say, inputs_show_user=i_say, + llm_kwargs=llm_kwargs, chatbot=chatbot, history=history, + sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。" + ) + chatbot[-1] = (i_say, gpt_say) + history.append(i_say);history.append(gpt_say) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 + From eb4c07997ece2efe35fce63b8bb7c36b6179342a Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 29 Jun 2023 11:30:42 +0800 Subject: [PATCH 64/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8DLatex=E7=9F=AB=E9=94=99?= =?UTF-8?q?=E5=92=8C=E6=9C=AC=E5=9C=B0Latex=E8=AE=BA=E6=96=87=E7=BF=BB?= =?UTF-8?q?=E8=AF=91=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 30 ++++++++++++------------- crazy_functions/Latex输出PDF结果.py | 24 +++++++++++++++----- crazy_functions/crazy_functions_test.py | 4 ++-- crazy_functions/latex_utils.py | 10 +++++---- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index a724b97..7f8c41e 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -348,17 +348,28 @@ def get_crazy_functions(): try: from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比 function_plugins.update({ - "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": { + "Latex英文纠错+高亮修正位置 [需Latex]": { "Color": "stop", "AsButton": False, - # "AdvancedArgs": True, - # "ArgsReminder": "", + "AdvancedArgs": True, + "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。", "Function": HotReload(Latex英文纠错加PDF对比) } }) from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF function_plugins.update({ - "Arixv翻译(输入arxivID) [需Latex]": { + "Arixv翻译(输入arxivID)[需Latex]": { + "Color": "stop", + "AsButton": False, + "AdvancedArgs": True, + "ArgsReminder": + "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ + "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', + "Function": HotReload(Latex翻译中文并重新编译PDF) + } + }) + function_plugins.update({ + "本地论文翻译(上传Latex压缩包)[需Latex]": { "Color": "stop", "AsButton": False, "AdvancedArgs": True, @@ -368,17 +379,6 @@ def get_crazy_functions(): "Function": HotReload(Latex翻译中文并重新编译PDF) } }) - # function_plugins.update({ - # "本地论文翻译(上传Latex压缩包) [需Latex]": { - # "Color": "stop", - # "AsButton": False, - # "AdvancedArgs": True, - # "ArgsReminder": - # "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ - # "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', - # "Function": HotReload(Latex翻译中文并重新编译PDF) - # } - # }) except: print('Load function plugin failed') diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py index 1886375..810d802 100644 --- a/crazy_functions/Latex输出PDF结果.py +++ b/crazy_functions/Latex输出PDF结果.py @@ -19,9 +19,9 @@ def switch_prompt(pfg, mode, more_requirement): - sys_prompt_array: A list of strings containing prompts for system prompts. """ n_split = len(pfg.sp_file_contents) - if mode == 'proofread': + if mode == 'proofread_en': inputs_array = [r"Below is a section from an academic paper, proofread this section." + - r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement + r"Answer me only with the revised text:" + f"\n\n{frag}" for frag in pfg.sp_file_contents] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] @@ -70,6 +70,12 @@ def move_project(project_folder, arxiv_id=None): shutil.rmtree(new_workfolder) except: pass + + # align subfolder if there is a folder wrapper + items = glob.glob(pj(project_folder,'*')) + if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1: + if os.path.isdir(items[0]): project_folder = items[0] + shutil.copytree(src=project_folder, dst=new_workfolder) return new_workfolder @@ -141,7 +147,11 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo chatbot.append([ "函数插件功能?", "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"]) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 - + + # <-------------- more requirements -------------> + if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg") + more_req = plugin_kwargs.get("advanced_arg", "") + _switch_prompt_ = partial(switch_prompt, more_requirement=more_req) # <-------------- check deps -------------> try: @@ -180,13 +190,13 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo # <-------------- if merge_translate_zh is already generated, skip gpt req -------------> - if not os.path.exists(project_folder + '/merge_proofread.tex'): + if not os.path.exists(project_folder + '/merge_proofread_en.tex'): yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, - chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) + chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_) # <-------------- compile PDF -------------> - success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', + success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en', work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) @@ -195,6 +205,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 @@ -278,6 +289,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, if success: chatbot.append((f"成功啦", '请查收结果(压缩包)...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 + promote_file_to_downloadzone(file=zip_res, chatbot=chatbot) else: chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index f2d3969..0c623b8 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -190,10 +190,10 @@ def test_Latex(): # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2306.07831" - txt = r"https://arxiv.org/abs/2212.10156" + # txt = r"https://arxiv.org/abs/2212.10156" # txt = r"https://arxiv.org/abs/2211.11559" # txt = r"https://arxiv.org/abs/2303.08774" - # txt = r"https://arxiv.org/abs/2303.12712" + txt = r"https://arxiv.org/abs/2303.12712" # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 49f547c..a38405c 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -430,7 +430,7 @@ class LatexPaperSplit(): """ def __init__(self) -> None: self.nodes = None - self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ + self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+\Latex 翻译插件一键生成," + \ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) @@ -741,13 +741,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f results_ += f"对比PDF编译是否成功: {diff_pdf_success};" yield from update_ui_lastest_msg(f'第{n_fix}编译结束:
{results_}...', chatbot, history) # 刷新Gradio前端界面 + if diff_pdf_success: + result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path + promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI if modified_pdf_success: yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(current_dir) - result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') + result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path if os.path.exists(pj(work_folder, '..', 'translation')): shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) - promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) + promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI return True # 成功啦 else: if n_fix>=max_try: break From 64f76e7401a099cffc2e177835bdb4d30891062d Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 29 Jun 2023 11:32:19 +0800 Subject: [PATCH 65/78] 3.42 --- version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version b/version index ceb909a..6353b34 100644 --- a/version +++ b/version @@ -1,5 +1,5 @@ { - "version": 3.41, + "version": 3.42, "show_feature": true, - "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" + "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" } From 1a0009301548d9ccbaaaa0ed33fdfb62c76465b8 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 29 Jun 2023 12:15:52 +0800 Subject: [PATCH 66/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/latex_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index a38405c..8b41fc9 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -430,7 +430,7 @@ class LatexPaperSplit(): """ def __init__(self) -> None: self.nodes = None - self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+\Latex 翻译插件一键生成," + \ + self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者) From 49253c4dc6393b68e08a0657011aad4c36fd7957 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 29 Jun 2023 12:29:49 +0800 Subject: [PATCH 67/78] [arxiv trans] add html comparison to zip file --- crazy_functions/latex_utils.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 8b41fc9..69f05ff 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -532,11 +532,11 @@ class LatexPaperFileGroup(): f.write(res) return manifest -def write_html(sp_file_contents, sp_file_result, chatbot): +def write_html(sp_file_contents, sp_file_result, chatbot, project_folder): # write html try: - import copy + import shutil from .crazy_utils import construct_html from toolbox import gen_time_str ch = construct_html() @@ -554,6 +554,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot): ch.add_row(a=orig, b=trans) create_report_file_name = f"{gen_time_str()}.trans.html" ch.save_file(create_report_file_name) + shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name)) promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot) except: from toolbox import trimmed_format_exc @@ -634,7 +635,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin pfg.get_token_num = None objdump(pfg, file=pj(project_folder,'temp.pkl')) - write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot) + write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder) # <-------- 写出文件 ----------> msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。" From aced272d3c3d4c3b3fd250b6c97c574cd95b30f8 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Thu, 29 Jun 2023 12:43:50 +0800 Subject: [PATCH 68/78] =?UTF-8?q?=E5=BE=AE=E8=B0=83=E6=8F=92=E4=BB=B6?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functional.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/crazy_functional.py b/crazy_functional.py index ec3235e..03aaaf5 100644 --- a/crazy_functional.py +++ b/crazy_functional.py @@ -226,19 +226,15 @@ def get_crazy_functions(): try: from crazy_functions.联网的ChatGPT import 连接网络回答问题 function_plugins.update({ - "连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": { + "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": { "Color": "stop", "AsButton": False, # 加入下拉菜单中 "Function": HotReload(连接网络回答问题) } }) - except: - print('Load function plugin failed') - - try: from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题 function_plugins.update({ - "连接网络回答问题_bing搜索(先输入问题,再点击按钮,搜索引擎为cn.bing.com,国内可用)": { + "连接网络回答问题(中文Bing版,输入问题后点击该插件)": { "Color": "stop", "AsButton": False, # 加入下拉菜单中 "Function": HotReload(连接bing搜索回答问题) From 3b78e0538b8890d7eefa8858948117be8d4da3e1 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 29 Jun 2023 14:52:58 +0800 Subject: [PATCH 69/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=92=E4=BB=B6demo?= =?UTF-8?q?=E7=9A=84=E5=9B=BE=E5=83=8F=E6=98=BE=E7=A4=BA=E7=9A=84=E9=97=AE?= =?UTF-8?q?=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/高级功能函数模板.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py index 7c6a7ff..73ae45f 100644 --- a/crazy_functions/高级功能函数模板.py +++ b/crazy_functions/高级功能函数模板.py @@ -1,6 +1,7 @@ from toolbox import CatchException, update_ui from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive -import datetime +import datetime, re + @CatchException def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): """ @@ -18,12 +19,34 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s for i in range(5): currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day - i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。' + i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式:{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}。' gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( inputs=i_say, inputs_show_user=i_say, llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], - sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。" + sys_prompt='输出格式示例:1908年,美国消防救援事业发展的“美国消防协会”成立。关键词:{"KeyWords":["Fire","American"]}。' ) + gpt_say = get_images(gpt_say) chatbot[-1] = (i_say, gpt_say) history.append(i_say);history.append(gpt_say) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 + + +def get_images(gpt_say): + def get_image_by_keyword(keyword): + import requests + from bs4 import BeautifulSoup + response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2) + for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"): + if "data-src" in image_element: break + return image_element["data-src"] + + for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say): + keywords = [n.strip('"') for n in keywords.split(',')] + try: + description = keywords[0] + url = get_image_by_keyword(keywords[0]) + img_tag = f"\n\n![{description}]({url})" + gpt_say += img_tag + except: + continue + return gpt_say \ No newline at end of file From 37172906ef5a697d2ef3ee272147a27dd67ae138 Mon Sep 17 00:00:00 2001 From: binary-husky Date: Thu, 29 Jun 2023 14:55:55 +0800 Subject: [PATCH 70/78] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=AF=BC=E5=87=BA=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- toolbox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/toolbox.py b/toolbox.py index fb6aa9f..256d99c 100644 --- a/toolbox.py +++ b/toolbox.py @@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot): else: report_files = find_recent_files('gpt_log') if len(report_files) == 0: - return None, chatbot + return cookies, None, chatbot # files.extend(report_files) file_links = '' for f in report_files: file_links += f'
{f}' From 22f377e2fb6bf45c2a0447c0680ee0a1eba8f6d7 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 30 Jun 2023 11:05:47 +0800 Subject: [PATCH 71/78] fix multi user cwd shift --- crazy_functions/crazy_functions_test.py | 3 +- crazy_functions/latex_utils.py | 51 ++++++++++++++++--------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 0c623b8..60b6b87 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -193,8 +193,9 @@ def test_Latex(): # txt = r"https://arxiv.org/abs/2212.10156" # txt = r"https://arxiv.org/abs/2211.11559" # txt = r"https://arxiv.org/abs/2303.08774" - txt = r"https://arxiv.org/abs/2303.12712" + # txt = r"https://arxiv.org/abs/2303.12712" # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" + txt = r"C:\Users\fuqingxu\Desktop\9" for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index 69f05ff..eb65a8a 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0): mask[res.span()[0]:res.span()[1]] = PRESERVE return text, mask +def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True): + """ + Move area out of preserve area (make text editable for GPT) + count the number of the braces so as to catch compelete text area. + e.g. + \begin{abstract} blablablablablabla. \end{abstract} + """ + if isinstance(pattern, list): pattern = '|'.join(pattern) + pattern_compile = re.compile(pattern, flags) + for res in pattern_compile.finditer(text): + if not forbid_wrapper: + mask[res.span()[0]:res.span()[1]] = TRANSFORM + else: + mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}' + mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract + mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract + return text, mask + def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): """ Add a preserve text area in this paper (text become untouchable for GPT). @@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts): # reverse 操作必须放在最后 text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True) text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True) + text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True) root = convert_to_linklist(text, mask) # 修复括号 @@ -672,10 +691,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") return False, -1, [-1] - -def compile_latex_with_timeout(command, timeout=60): +def compile_latex_with_timeout(command, cwd, timeout=60): import subprocess - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd) try: stdout, stderr = process.communicate(timeout=timeout) except subprocess.TimeoutExpired: @@ -699,24 +717,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')): # 只有第二步成功,才能继续下面的步骤 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面 if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')): - os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir) + ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original) if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')): - os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir) + ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified) yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) - os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) - os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) - os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified) if mode!='translate_zh': yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 @@ -724,13 +742,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) - os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) + ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) + ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder) - # <---------------------> - os.chdir(current_dir) # <---------- 检查结果 -----------> results_ = "" @@ -766,7 +782,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面 if not can_retry: break - os.chdir(current_dir) return False # 失败啦 From 403667aec18cba2d9fb719afa946168f3907124f Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 30 Jun 2023 12:06:28 +0800 Subject: [PATCH 72/78] upgrade chatglm to chatglm2 --- request_llm/bridge_all.py | 11 ++++++++++- request_llm/bridge_chatglm.py | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py index 02cfe98..d33f161 100644 --- a/request_llm/bridge_all.py +++ b/request_llm/bridge_all.py @@ -152,7 +152,7 @@ model_info = { "token_cnt": get_token_num_gpt4, }, - # chatglm + # 将 chatglm 直接对齐到 chatglm2 "chatglm": { "fn_with_ui": chatglm_ui, "fn_without_ui": chatglm_noui, @@ -161,6 +161,15 @@ model_info = { "tokenizer": tokenizer_gpt35, "token_cnt": get_token_num_gpt35, }, + "chatglm2": { + "fn_with_ui": chatglm_ui, + "fn_without_ui": chatglm_noui, + "endpoint": None, + "max_token": 1024, + "tokenizer": tokenizer_gpt35, + "token_cnt": get_token_num_gpt35, + }, + # newbing "newbing": { "fn_with_ui": newbing_ui, diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py index 100783d..deaacd2 100644 --- a/request_llm/bridge_chatglm.py +++ b/request_llm/bridge_chatglm.py @@ -40,12 +40,12 @@ class GetGLMHandle(Process): while True: try: if self.chatglm_model is None: - self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) + self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True) device, = get_conf('LOCAL_MODEL_DEVICE') if device=='cpu': - self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() + self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float() else: - self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() + self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda() self.chatglm_model = self.chatglm_model.eval() break else: From ecb08e69be5b39a206b742c365379286260ecabe Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Fri, 30 Jun 2023 13:08:54 +0800 Subject: [PATCH 73/78] remove find picture core functionality --- core_functional.py | 2 +- crazy_functions/批量翻译PDF文档_多线程.py | 19 +++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/core_functional.py b/core_functional.py index e126b57..7bc3582 100644 --- a/core_functional.py +++ b/core_functional.py @@ -63,6 +63,7 @@ def get_core_functions(): "Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," + r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n", "Suffix": r"", + "Visible": False, }, "解释代码": { "Prefix": r"请解释以下代码:" + "\n```\n", @@ -73,6 +74,5 @@ def get_core_functions(): r"Note that, reference styles maybe more than one kind, you should transform each item correctly." + r"Items need to be transformed:", "Suffix": r"", - "Visible": False, } } diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py index 06d8a5a..0adac96 100644 --- a/crazy_functions/批量翻译PDF文档_多线程.py +++ b/crazy_functions/批量翻译PDF文档_多线程.py @@ -1,5 +1,5 @@ from toolbox import CatchException, report_execption, write_results_to_file -from toolbox import update_ui +from toolbox import update_ui, promote_file_to_downloadzone from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from .crazy_utils import read_and_clean_pdf_text @@ -147,23 +147,14 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, print('writing html result failed:', trimmed_format_exc()) # 准备文件的下载 - import shutil for pdf_path in generated_conclusion_files: # 重命名文件 - rename_file = f'./gpt_log/翻译-{os.path.basename(pdf_path)}' - if os.path.exists(rename_file): - os.remove(rename_file) - shutil.copyfile(pdf_path, rename_file) - if os.path.exists(pdf_path): - os.remove(pdf_path) + rename_file = f'翻译-{os.path.basename(pdf_path)}' + promote_file_to_downloadzone(pdf_path, rename_file=rename_file, chatbot=chatbot) for html_path in generated_html_files: # 重命名文件 - rename_file = f'./gpt_log/翻译-{os.path.basename(html_path)}' - if os.path.exists(rename_file): - os.remove(rename_file) - shutil.copyfile(html_path, rename_file) - if os.path.exists(html_path): - os.remove(html_path) + rename_file = f'翻译-{os.path.basename(html_path)}' + promote_file_to_downloadzone(html_path, rename_file=rename_file, chatbot=chatbot) chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files))) yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 From df3f1aa3cac73b5906e36b446ee4cb6edd91af6a Mon Sep 17 00:00:00 2001 From: binary-husky Date: Fri, 30 Jun 2023 14:56:22 +0800 Subject: [PATCH 74/78] =?UTF-8?q?=E6=9B=B4=E6=AD=A3ChatGLM2=E7=9A=84?= =?UTF-8?q?=E9=BB=98=E8=AE=A4Token=E6=95=B0=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 65e1f4c..2144010 100644 --- a/main.py +++ b/main.py @@ -104,7 +104,7 @@ def main(): system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) - max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",) + max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",) checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区") md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False) From a1092d8f924420d243e36dca22ccfd3037df821a Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sat, 1 Jul 2023 00:17:26 +0800 Subject: [PATCH 75/78] =?UTF-8?q?=E6=8F=90=E4=BE=9B=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=B8=85=E7=A9=BA=E8=BE=93=E5=85=A5=E6=A1=86=E7=9A=84=E9=80=89?= =?UTF-8?q?=E9=A1=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.py | 3 +++ main.py | 9 +++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/config.py b/config.py index 58e0e09..f187a0c 100644 --- a/config.py +++ b/config.py @@ -56,6 +56,9 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda" # 设置gradio的并行线程数(不需要修改) CONCURRENT_COUNT = 100 +# 是否在提交时自动清空输入框 +AUTO_CLEAR_TXT = False + # 加一个live2d装饰 ADD_WAIFU = False diff --git a/main.py b/main.py index 65e1f4c..f1b7f45 100644 --- a/main.py +++ b/main.py @@ -6,8 +6,8 @@ def main(): from request_llm.bridge_all import predict from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 - proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \ - get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS') + proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \ + get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT') # 如果WEB_PORT是-1, 则随机选取WEB端口 PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT @@ -144,6 +144,11 @@ def main(): resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status]) clearBtn.click(lambda: ("",""), None, [txt, txt2]) clearBtn2.click(lambda: ("",""), None, [txt, txt2]) + if AUTO_CLEAR_TXT: + submitBtn.click(lambda: ("",""), None, [txt, txt2]) + submitBtn2.click(lambda: ("",""), None, [txt, txt2]) + txt.submit(lambda: ("",""), None, [txt, txt2]) + txt2.submit(lambda: ("",""), None, [txt, txt2]) # 基础功能区的回调函数注册 for k in functional: if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue From d7ac99f603bef9eeae26b22df4891d562cd829d4 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sat, 1 Jul 2023 01:46:43 +0800 Subject: [PATCH 76/78] =?UTF-8?q?=E6=9B=B4=E6=AD=A3=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/crazy_functions_test.py | 2 +- crazy_functions/latex_utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py index 60b6b87..a10f3c2 100644 --- a/crazy_functions/crazy_functions_test.py +++ b/crazy_functions/crazy_functions_test.py @@ -195,7 +195,7 @@ def test_Latex(): # txt = r"https://arxiv.org/abs/2303.08774" # txt = r"https://arxiv.org/abs/2303.12712" # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" - txt = r"C:\Users\fuqingxu\Desktop\9" + txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误! for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py index eb65a8a..be5a367 100644 --- a/crazy_functions/latex_utils.py +++ b/crazy_functions/latex_utils.py @@ -203,6 +203,7 @@ def merge_tex_files_(project_foler, main_file, mode): c = fx.read() else: # e.g., \input{srcs/07_appendix} + assert os.path.exists(fp+'.tex'), f'即找不到{fp},也找不到{fp}.tex,Tex源文件缺失!' with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx: c = fx.read() c = merge_tex_files_(project_foler, c, mode) From 41c10f5688fe5e9993ec16723f1191b07798b9c0 Mon Sep 17 00:00:00 2001 From: qingxu fu <505030475@qq.com> Date: Sat, 1 Jul 2023 02:28:32 +0800 Subject: [PATCH 77/78] report image generation error in UI --- crazy_functions/图片生成.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crazy_functions/图片生成.py b/crazy_functions/图片生成.py index 5bf8bc4..1bf53f4 100644 --- a/crazy_functions/图片生成.py +++ b/crazy_functions/图片生成.py @@ -27,8 +27,10 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"): } response = requests.post(url, headers=headers, json=data, proxies=proxies) print(response.content) - image_url = json.loads(response.content.decode('utf8'))['data'][0]['url'] - + try: + image_url = json.loads(response.content.decode('utf8'))['data'][0]['url'] + except: + raise RuntimeError(response.content.decode()) # 文件保存到本地 r = requests.get(image_url, proxies=proxies) file_path = 'gpt_log/image_gen/' From 5f7ffef2385786f91a741dc41de223492fe7a66a Mon Sep 17 00:00:00 2001 From: w_xiaolizu Date: Fri, 21 Apr 2023 17:09:49 +0800 Subject: [PATCH 78/78] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=9F=BA=E7=A1=80?= =?UTF-8?q?=E5=8A=9F=E8=83=BD=E5=88=A4=E7=A9=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crazy_functions/辅助回答.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 crazy_functions/辅助回答.py diff --git a/crazy_functions/辅助回答.py b/crazy_functions/辅助回答.py new file mode 100644 index 0000000..b635f88 --- /dev/null +++ b/crazy_functions/辅助回答.py @@ -0,0 +1,28 @@ +# encoding: utf-8 +# @Time : 2023/4/19 +# @Author : Spike +# @Descr : +from toolbox import update_ui +from toolbox import CatchException, report_execption, write_results_to_file +from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive + + +@CatchException +def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): + if txt: + show_say = txt + prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。' + else: + prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。" + show_say = '分析上述回答,再列出用户可能提出的三个问题。' + gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( + inputs=prompt, + inputs_show_user=show_say, + llm_kwargs=llm_kwargs, + chatbot=chatbot, + history=history, + sys_prompt=system_prompt + ) + chatbot[-1] = (show_say, gpt_say) + history.extend([show_say, gpt_say]) + yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 \ No newline at end of file