diff --git a/README.md b/README.md
index c69bfb5..7760260 100644
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
1. 下载项目
```sh
-git clone https://github.com/binary-husky/.git
+git clone https://github.com/binary-husky/gpt_academic.git
cd gpt_academic
```
@@ -186,16 +186,19 @@ docker-compose up
2. 使用docker-compose运行。
请阅读docker-compose.yml后,按照其中的提示操作即可
-3. 如何使用反代URL/微软云AzureAPI。
+3. 如何使用反代URL
按照`config.py`中的说明配置API_URL_REDIRECT即可。
-4. 远程云服务器部署(需要云服务器知识与经验)。
+4. 微软云AzureAPI
+按照`config.py`中的说明配置即可(AZURE_ENDPOINT等四个配置)
+
+5. 远程云服务器部署(需要云服务器知识与经验)。
请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
-5. 使用WSL2(Windows Subsystem for Linux 子系统)。
+6. 使用WSL2(Windows Subsystem for Linux 子系统)。
请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
-6. 如何在二级网址(如`http://localhost/subpath`)下运行。
+7. 如何在二级网址(如`http://localhost/subpath`)下运行。
请访问[FastAPI运行说明](docs/WithFastapi.md)
---
diff --git a/config.py b/config.py
index 87e0ec9..b173862 100644
--- a/config.py
+++ b/config.py
@@ -1,6 +1,7 @@
# [step 1]>> 例如: API_KEY = "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" (此key无效)
API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2"
+
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
USE_PROXY = False
if USE_PROXY:
@@ -81,3 +82,10 @@ your bing cookies here
# 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md
SLACK_CLAUDE_BOT_ID = ''
SLACK_CLAUDE_USER_TOKEN = ''
+
+
+# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
+AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
+AZURE_API_KEY = "填入azure openai api的密钥"
+AZURE_API_VERSION = "填入api版本"
+AZURE_ENGINE = "填入ENGINE"
diff --git a/crazy_functional.py b/crazy_functional.py
index a1ba0c2..a724b97 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -40,57 +40,57 @@ def get_crazy_functions():
"AsButton":False,
"Function": HotReload(删除所有本地对话历史记录)
},
- # "[测试功能] 解析Jupyter Notebook文件": {
- # "Color": "stop",
- # "AsButton":False,
- # "Function": HotReload(解析ipynb文件),
- # "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
- # "ArgsReminder": "若输入0,则不解析notebook中的Markdown块", # 高级参数输入区的显示提示
- # },
+ "[测试功能] 解析Jupyter Notebook文件": {
+ "Color": "stop",
+ "AsButton":False,
+ "Function": HotReload(解析ipynb文件),
+ "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
+ "ArgsReminder": "若输入0,则不解析notebook中的Markdown块", # 高级参数输入区的显示提示
+ },
"批量总结Word文档": {
"Color": "stop",
"Function": HotReload(总结word文档)
},
- # "解析整个C++项目头文件": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个C项目的头文件)
- # },
- # "解析整个C++项目(.cpp/.hpp/.c/.h)": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个C项目)
- # },
- # "解析整个Go项目": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个Golang项目)
- # },
- # "解析整个Rust项目": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个Rust项目)
- # },
- # "解析整个Java项目": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个Java项目)
- # },
- # "解析整个前端项目(js,ts,css等)": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个前端项目)
- # },
- # "解析整个Lua项目": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个Lua项目)
- # },
- # "解析整个CSharp项目": {
- # "Color": "stop", # 按钮颜色
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析一个CSharp项目)
- # },
+ "解析整个C++项目头文件": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个C项目的头文件)
+ },
+ "解析整个C++项目(.cpp/.hpp/.c/.h)": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个C项目)
+ },
+ "解析整个Go项目": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个Golang项目)
+ },
+ "解析整个Rust项目": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个Rust项目)
+ },
+ "解析整个Java项目": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个Java项目)
+ },
+ "解析整个前端项目(js,ts,css等)": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个前端项目)
+ },
+ "解析整个Lua项目": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个Lua项目)
+ },
+ "解析整个CSharp项目": {
+ "Color": "stop", # 按钮颜色
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析一个CSharp项目)
+ },
"读Tex论文写摘要": {
"Color": "stop", # 按钮颜色
"Function": HotReload(读文章写摘要)
@@ -108,19 +108,19 @@ def get_crazy_functions():
"保存当前的对话": {
"Function": HotReload(对话历史存档)
},
- # "[多线程Demo] 解析此项目本身(源码自译解)": {
- # "AsButton": False, # 加入下拉菜单中
- # "Function": HotReload(解析项目本身)
- # },
+ "[多线程Demo] 解析此项目本身(源码自译解)": {
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(解析项目本身)
+ },
# "[老旧的Demo] 把本项目源代码切换成全英文": {
# # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
# "AsButton": False, # 加入下拉菜单中
# "Function": HotReload(全项目切换英文)
# },
- # "[插件demo] 历史上的今天": {
- # # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
- # "Function": HotReload(高阶功能模板函数)
- # },
+ "[插件demo] 历史上的今天": {
+ # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
+ "Function": HotReload(高阶功能模板函数)
+ },
}
###################### 第二组插件 ###########################
@@ -135,35 +135,17 @@ def get_crazy_functions():
from crazy_functions.Latex全文翻译 import Latex中译英
from crazy_functions.Latex全文翻译 import Latex英译中
from crazy_functions.批量Markdown翻译 import Markdown中译英
- # from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
- from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
- "翻译任意PDF文档2MD": {
+ "批量翻译PDF文档(多线程)": {
"Color": "stop",
"AsButton": True, # 加入下拉菜单中
"Function": HotReload(批量翻译PDF文档)
},
- # "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
- # "Color": "stop",
- # "AsButton": True,
- # # "AdvancedArgs": True,
- # # "ArgsReminder": "",
- # "Function": HotReload(Latex英文纠错加PDF对比)
- # },
- "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": {
- "Color": "stop",
- "AsButton": True,
- # "AdvancedArgs": True,
- # "ArgsReminder": "",
- "Function": HotReload(Latex翻译中文并重新编译PDF)
+ "询问多个GPT模型": {
+ "Color": "stop", # 按钮颜色
+ "Function": HotReload(同时问询)
},
-
-
- # "询问多个GPT模型": {
- # "Color": "stop", # 按钮颜色
- # "Function": HotReload(同时问询)
- # },
"[测试功能] 批量总结PDF文档": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
@@ -253,33 +235,33 @@ def get_crazy_functions():
except:
print('Load function plugin failed')
- # try:
- # from crazy_functions.解析项目源代码 import 解析任意code项目
- # function_plugins.update({
- # "解析项目源代码(手动指定和筛选源代码文件类型)": {
- # "Color": "stop",
- # "AsButton": False,
- # "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
- # "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
- # "Function": HotReload(解析任意code项目)
- # },
- # })
- # except:
- # print('Load function plugin failed')
+ try:
+ from crazy_functions.解析项目源代码 import 解析任意code项目
+ function_plugins.update({
+ "解析项目源代码(手动指定和筛选源代码文件类型)": {
+ "Color": "stop",
+ "AsButton": False,
+ "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
+ "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
+ "Function": HotReload(解析任意code项目)
+ },
+ })
+ except:
+ print('Load function plugin failed')
- # try:
- # from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
- # function_plugins.update({
- # "询问多个GPT模型(手动指定询问哪些模型)": {
- # "Color": "stop",
- # "AsButton": False,
- # "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
- # "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
- # "Function": HotReload(同时问询_指定模型)
- # },
- # })
- # except:
- # print('Load function plugin failed')
+ try:
+ from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
+ function_plugins.update({
+ "询问多个GPT模型(手动指定询问哪些模型)": {
+ "Color": "stop",
+ "AsButton": False,
+ "AdvancedArgs": True, # 调用时,唤起高级参数输入区(默认False)
+ "ArgsReminder": "支持任意数量的llm接口,用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
+ "Function": HotReload(同时问询_指定模型)
+ },
+ })
+ except:
+ print('Load function plugin failed')
try:
from crazy_functions.图片生成 import 图片生成
@@ -364,17 +346,54 @@ def get_crazy_functions():
print('Load function plugin failed')
try:
- from crazy_functions.虚空终端 import 终端
+ from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
function_plugins.update({
- "超级终端": {
+ "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
"Color": "stop",
"AsButton": False,
# "AdvancedArgs": True,
# "ArgsReminder": "",
- "Function": HotReload(终端)
+ "Function": HotReload(Latex英文纠错加PDF对比)
}
})
+ from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
+ function_plugins.update({
+ "Arixv翻译(输入arxivID) [需Latex]": {
+ "Color": "stop",
+ "AsButton": False,
+ "AdvancedArgs": True,
+ "ArgsReminder":
+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
+ "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+ "Function": HotReload(Latex翻译中文并重新编译PDF)
+ }
+ })
+ # function_plugins.update({
+ # "本地论文翻译(上传Latex压缩包) [需Latex]": {
+ # "Color": "stop",
+ # "AsButton": False,
+ # "AdvancedArgs": True,
+ # "ArgsReminder":
+ # "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
+ # "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+ # "Function": HotReload(Latex翻译中文并重新编译PDF)
+ # }
+ # })
except:
print('Load function plugin failed')
+ # try:
+ # from crazy_functions.虚空终端 import 终端
+ # function_plugins.update({
+ # "超级终端": {
+ # "Color": "stop",
+ # "AsButton": False,
+ # # "AdvancedArgs": True,
+ # # "ArgsReminder": "",
+ # "Function": HotReload(终端)
+ # }
+ # })
+ # except:
+ # print('Load function plugin failed')
+
return function_plugins
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 70431ba..1886375 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -1,13 +1,13 @@
from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
+from functools import partial
import glob, os, requests, time
pj = os.path.join
-# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
-# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
-ARXIV_CACHE_DIR = os.getenv("Arxiv_Cache")
+ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
+
# =================================== 工具函数 ===============================================
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
-def switch_prompt(pfg, mode):
+def switch_prompt(pfg, mode, more_requirement):
"""
Generate prompts and system prompts based on the mode for proofreading or translating.
Args:
@@ -26,7 +26,7 @@ def switch_prompt(pfg, mode):
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif mode == 'translate_zh':
- inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 +
+ inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the translated text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
@@ -80,7 +80,7 @@ def arxiv_download(chatbot, history, txt):
os.makedirs(translation_dir)
target_file = pj(translation_dir, 'translate_zh.pdf')
if os.path.exists(target_file):
- promote_file_to_downloadzone(target_file)
+ promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
return target_file
return False
def is_float(s):
@@ -89,8 +89,10 @@ def arxiv_download(chatbot, history, txt):
return True
except ValueError:
return False
- if ('.' in txt) and ('/' not in txt) and is_float(txt):
- txt = 'https://arxiv.org/abs/' + txt
+ if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
+ txt = 'https://arxiv.org/abs/' + txt.strip()
+ if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
+ txt = 'https://arxiv.org/abs/' + txt[:10]
if not txt.startswith('https://arxiv.org'):
return txt, None
@@ -106,6 +108,7 @@ def arxiv_download(chatbot, history, txt):
return msg, None
# <-------------- set format ------------->
arxiv_id = url_.split('/abs/')[-1]
+ if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
if cached_translation_pdf: return cached_translation_pdf, arxiv_id
@@ -178,7 +181,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread.tex'):
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+ yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
+ chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
# <-------------- compile PDF ------------->
@@ -187,13 +191,14 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- zip PDF ------------->
- zip_result(project_folder)
+ zip_res = zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+ promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
return success
@@ -209,6 +214,10 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+ # <-------------- more requirements ------------->
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+ more_req = plugin_kwargs.get("advanced_arg", "")
+ _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
@@ -256,21 +265,23 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt)
+ yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
+ chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)
# <-------------- compile PDF ------------->
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh',
+ success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
# <-------------- zip PDF ------------->
- zip_result(project_folder)
+ zip_res = zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+ promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index e743878..b4ff5e2 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -189,6 +189,7 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2211.16068" # ACE
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
txt = r"https://arxiv.org/abs/2002.09253"
+ txt = r"https://arxiv.org/abs/2306.07831"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
@@ -217,6 +218,7 @@ def test_Latex():
# test_数学动画生成manim()
# test_Langchain知识库()
# test_Langchain知识库读取()
-test_Latex()
-input("程序完成,回车退出。")
-print("退出。")
\ No newline at end of file
+if __name__ == "__main__":
+ test_Latex()
+ input("程序完成,回车退出。")
+ print("退出。")
\ No newline at end of file
diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
index 96301ff..a1b1493 100644
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -698,3 +698,51 @@ def try_install_deps(deps):
for dep in deps:
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
+
+
+class construct_html():
+ def __init__(self) -> None:
+ self.css = """
+.row {
+ display: flex;
+ flex-wrap: wrap;
+}
+
+.column {
+ flex: 1;
+ padding: 10px;
+}
+
+.table-header {
+ font-weight: bold;
+ border-bottom: 1px solid black;
+}
+
+.table-row {
+ border-bottom: 1px solid lightgray;
+}
+
+.table-cell {
+ padding: 5px;
+}
+ """
+ self.html_string = f'
翻译结果'
+
+
+ def add_row(self, a, b):
+ tmp = """
+
+
REPLACE_A
+
REPLACE_B
+
+ """
+ from toolbox import markdown_convertion
+ tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
+ tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
+ self.html_string += tmp
+
+
+ def save_file(self, file_name):
+ with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
+ f.write(self.html_string.encode('utf-8', 'ignore').decode())
+
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index c380da0..48df10b 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -175,9 +175,8 @@ def merge_tex_files(project_foler, main_file, mode):
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
- if platform.system() != 'Windows':
- main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file)
- main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file)
+ main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
+ main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(main_file)
@@ -213,6 +212,8 @@ def fix_content(final_tex, node_string):
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
+ if "Traceback" in final_tex and "[Local Message]" in final_tex:
+ final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
@@ -404,7 +405,7 @@ class LatexPaperSplit():
def __init__(self) -> None:
self.nodes = None
self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
- "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \
+ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
@@ -498,7 +499,32 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
+def write_html(sp_file_contents, sp_file_result, chatbot):
+ # write html
+ try:
+ import copy
+ from .crazy_utils import construct_html
+ from toolbox import gen_time_str
+ ch = construct_html()
+ orig = ""
+ trans = ""
+ final = []
+ for c,r in zip(sp_file_contents, sp_file_result):
+ final.append(c)
+ final.append(r)
+ for i, k in enumerate(final):
+ if i%2==0:
+ orig = k
+ if i%2==1:
+ trans = k
+ ch.add_row(a=orig, b=trans)
+ create_report_file_name = f"{gen_time_str()}.trans.html"
+ ch.save_file(create_report_file_name)
+ promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
+ except:
+ from toolbox import trimmed_format_exc
+ print('writing html result failed:', trimmed_format_exc())
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
import time, os, re
@@ -575,6 +601,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
pfg.get_token_num = None
objdump(pfg, file=pj(project_folder,'temp.pkl'))
+ write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
@@ -624,7 +651,7 @@ def compile_latex_with_timeout(command, timeout=60):
return False
return True
-def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
+def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
import os, time
current_dir = os.getcwd()
n_fix = 1
@@ -635,6 +662,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
while True:
import os
+
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
@@ -656,15 +684,16 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
- print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
- ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+ if mode!='translate_zh':
+ yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
+ print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+ ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
# <--------------------->
os.chdir(current_dir)
@@ -685,9 +714,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
- promote_file_to_downloadzone(result_pdf, chatbot=chatbot)
- # 我重新指定了参数传入。
- # promote_file_to_downloadzone(file=result_pdf, rename_file=None, chatbot=chatbot)
+ promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
return True # 成功啦
else:
if n_fix>=max_try: break
diff --git a/crazy_functions/对话历史存档.py b/crazy_functions/对话历史存档.py
index c638d1b..fed0f8f 100644
--- a/crazy_functions/对话历史存档.py
+++ b/crazy_functions/对话历史存档.py
@@ -1,4 +1,4 @@
-from toolbox import CatchException, update_ui
+from toolbox import CatchException, update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import re
@@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
for h in history:
f.write("\n>>>" + h)
f.write('')
- res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
- print(res)
- return res
+ promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot)
+ return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
def gen_file_preview(file_name):
try:
diff --git a/docs/GithubAction+NoLocal+Latex b/docs/GithubAction+NoLocal+Latex
new file mode 100644
index 0000000..5ff9bb8
--- /dev/null
+++ b/docs/GithubAction+NoLocal+Latex
@@ -0,0 +1,25 @@
+# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
+# - 1 修改 `config.py`
+# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex .
+# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
+
+FROM fuqingxu/python311_texlive_ctex:latest
+
+# 指定路径
+WORKDIR /gpt
+
+RUN pip3 install gradio openai numpy arxiv rich
+RUN pip3 install colorama Markdown pygments pymupdf
+
+# 装载项目文件
+COPY . .
+
+
+# 安装依赖
+RUN pip3 install -r requirements.txt
+
+# 可选步骤,用于预热模块
+RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
+
+# 启动
+CMD ["python3", "-u", "main.py"]
diff --git a/docs/use_azure.md b/docs/use_azure.md
new file mode 100644
index 0000000..626b132
--- /dev/null
+++ b/docs/use_azure.md
@@ -0,0 +1,143 @@
+# 通过微软Azure云服务申请 Openai API
+
+由于Openai和微软的关系,现在是可以通过微软的Azure云计算服务直接访问openai的api,免去了注册和网络的问题。
+
+快速入门的官方文档的链接是:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
+
+# 申请API
+
+按文档中的“先决条件”的介绍,出了编程的环境以外,还需要以下三个条件:
+
+1. Azure账号并创建订阅
+
+2. 为订阅添加Azure OpenAI 服务
+
+3. 部署模型
+
+## Azure账号并创建订阅
+
+### Azure账号
+
+创建Azure的账号时最好是有微软的账号,这样似乎更容易获得免费额度(第一个月的200美元,实测了一下,如果用一个刚注册的微软账号登录Azure的话,并没有这一个月的免费额度)。
+
+创建Azure账号的网址是:[立即创建 Azure 免费帐户 | Microsoft Azure](https://azure.microsoft.com/zh-cn/free/)
+
+
+
+打开网页后,点击 “免费开始使用” 会跳转到登录或注册页面,如果有微软的账户,直接登录即可,如果没有微软账户,那就需要到微软的网页再另行注册一个。
+
+注意,Azure的页面和政策时不时会变化,已实际最新显示的为准就好。
+
+### 创建订阅
+
+注册好Azure后便可进入主页:
+
+
+
+首先需要在订阅里进行添加操作,点开后即可进入订阅的页面:
+
+
+
+第一次进来应该是空的,点添加即可创建新的订阅(可以是“免费”或者“即付即用”的订阅),其中订阅ID是后面申请Azure OpenAI需要使用的。
+
+## 为订阅添加Azure OpenAI服务
+
+之后回到首页,点Azure OpenAI即可进入OpenAI服务的页面(如果不显示的话,则在首页上方的搜索栏里搜索“openai”即可)。
+
+
+
+不过现在这个服务还不能用。在使用前,还需要在这个网址申请一下:
+
+[Request Access to Azure OpenAI Service (microsoft.com)](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu)
+
+这里有二十来个问题,按照要求和自己的实际情况填写即可。
+
+其中需要注意的是
+
+1. 千万记得填对"订阅ID"
+
+2. 需要填一个公司邮箱(可以不是注册用的邮箱)和公司网址
+
+之后,在回到上面那个页面,点创建,就会进入创建页面了:
+
+
+
+需要填入“资源组”和“名称”,按照自己的需要填入即可。
+
+完成后,在主页的“资源”里就可以看到刚才创建的“资源”了,点击进入后,就可以进行最后的部署了。
+
+
+
+## 部署模型
+
+进入资源页面后,在部署模型前,可以先点击“开发”,把密钥和终结点记下来。
+
+
+
+之后,就可以去部署模型了,点击“部署”即可,会跳转到 Azure OpenAI Stuido 进行下面的操作:
+
+
+
+进入 Azure OpenAi Studio 后,点击新建部署,会弹出如下对话框:
+
+
+
+在这里选 gpt-35-turbo 或需要的模型并按需要填入“部署名”即可完成模型的部署。
+
+
+
+这个部署名需要记下来。
+
+到现在为止,申请操作就完成了,需要记下来的有下面几个东西:
+
+● 密钥(1或2都可以)
+
+● 终结点
+
+● 部署名(不是模型名)
+
+# API的使用
+
+接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
+
+和openai自己的api调用有点类似,都需要安装openai库,不同的是调用方式
+
+```
+import openai
+openai.api_type = "azure" #固定格式,无需修改
+openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点”
+openai.api_version = "2023-05-15" #固定格式,无需修改
+openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2”
+
+response = openai.ChatCompletion.create(
+ engine="gpt-35-turbo", #这里填入的不是模型名,是部署名
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
+ {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
+ {"role": "user", "content": "Do other Azure Cognitive Services support this too?"}
+ ]
+)
+
+print(response)
+print(response['choices'][0]['message']['content'])
+
+```
+
+需要注意的是:
+
+1. engine那里填入的是部署名,不是模型名
+
+2. 通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同,不需要 decode,已经是解析好的 json 了,直接根据键值读取即可。
+
+更细节的使用方法,详见官方API文档。
+
+# 关于费用
+
+Azure OpenAI API 还是需要一些费用的(免费订阅只有1个月有效期),费用如下:
+
+
+
+具体可以可以看这个网址 :[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable)
+
+并非网上说的什么“一年白嫖”,但注册方法以及网络问题都比直接使用openai的api要简单一些。
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index a27407c..8656ee5 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -16,6 +16,9 @@ from toolbox import get_conf, trimmed_format_exc
from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
from .bridge_chatgpt import predict as chatgpt_ui
+from .bridge_azure_test import predict_no_ui_long_connection as azure_noui
+from .bridge_azure_test import predict as azure_ui
+
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
from .bridge_chatglm import predict as chatglm_ui
@@ -102,6 +105,16 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
+ # azure openai
+ "azure-gpt35":{
+ "fn_with_ui": azure_ui,
+ "fn_without_ui": azure_noui,
+ "endpoint": get_conf("AZURE_ENDPOINT"),
+ "max_token": 4096,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+
# api_2d
"api2d-gpt-3.5-turbo": {
"fn_with_ui": chatgpt_ui,
diff --git a/request_llm/bridge_azure_test.py b/request_llm/bridge_azure_test.py
new file mode 100644
index 0000000..edc68f7
--- /dev/null
+++ b/request_llm/bridge_azure_test.py
@@ -0,0 +1,241 @@
+"""
+ 该文件中主要包含三个函数
+
+ 不具备多线程能力的函数:
+ 1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
+
+ 具备多线程调用能力的函数
+ 2. predict_no_ui:高级实验性功能模块调用,不会实时显示在界面上,参数简单,可以多线程并行,方便实现复杂的功能逻辑
+ 3. predict_no_ui_long_connection:在实验过程中发现调用predict_no_ui处理长文档时,和openai的连接容易断掉,这个函数用stream的方式解决这个问题,同样支持多线程
+"""
+
+import logging
+import traceback
+import importlib
+import openai
+import time
+
+
+# 读取config.py文件中关于AZURE OPENAI API的信息
+from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc
+TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \
+ get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY")
+
+
+def get_full_error(chunk, stream_response):
+ """
+ 获取完整的从Openai返回的报错
+ """
+ while True:
+ try:
+ chunk += next(stream_response)
+ except:
+ break
+ return chunk
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+ """
+ 发送至azure openai api,流式获取输出。
+ 用于基础的对话功能。
+ inputs 是本次问询的输入
+ top_p, temperature是chatGPT的内部调优参数
+ history 是之前的对话列表(注意无论是inputs还是history,内容太长了都会触发token数量溢出的错误)
+ chatbot 为WebUI中显示的对话列表,修改它,然后yeild出去,可以直接修改对话界面内容
+ additional_fn代表点击的哪个按钮,按钮见functional.py
+ """
+ print(llm_kwargs["llm_model"])
+
+ if additional_fn is not None:
+ import core_functional
+ importlib.reload(core_functional) # 热更新prompt
+ core_functional = core_functional.get_core_functions()
+ if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
+ inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
+
+ raw_input = inputs
+ logging.info(f'[raw_input] {raw_input}')
+ chatbot.append((inputs, ""))
+ yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
+
+
+ payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream)
+
+ history.append(inputs); history.append("")
+
+ retry = 0
+ while True:
+ try:
+
+ openai.api_type = "azure"
+ openai.api_version = AZURE_API_VERSION
+ openai.api_base = AZURE_ENDPOINT
+ openai.api_key = AZURE_API_KEY
+ response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
+
+ except:
+ retry += 1
+ chatbot[-1] = ((chatbot[-1][0], "获取response失败,重试中。。。"))
+ retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
+ yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+ if retry > MAX_RETRY: raise TimeoutError
+
+ gpt_replying_buffer = ""
+ is_head_of_the_stream = True
+ if stream:
+
+ stream_response = response
+
+ while True:
+ try:
+ chunk = next(stream_response)
+
+ except StopIteration:
+ from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
+ chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}")
+ yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面
+ return
+
+ if is_head_of_the_stream and (r'"object":"error"' not in chunk):
+ # 数据流的第一帧不携带content
+ is_head_of_the_stream = False; continue
+
+ if chunk:
+ #print(chunk)
+ try:
+ if "delta" in chunk["choices"][0]:
+ if chunk["choices"][0]["finish_reason"] == "stop":
+ logging.info(f'[response] {gpt_replying_buffer}')
+ break
+ status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}"
+ gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"]
+
+ history[-1] = gpt_replying_buffer
+ chatbot[-1] = (history[-2], history[-1])
+ yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
+
+ except Exception as e:
+ traceback.print_exc()
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
+ chunk = get_full_error(chunk, stream_response)
+
+ error_msg = chunk
+ yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+ return
+
+
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
+ """
+ 发送至AZURE OPENAI API,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
+ inputs:
+ 是本次问询的输入
+ sys_prompt:
+ 系统静默prompt
+ llm_kwargs:
+ chatGPT的内部调优参数
+ history:
+ 是之前的对话列表
+ observe_window = None:
+ 用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
+ """
+ watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
+ payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
+ retry = 0
+ while True:
+
+ try:
+ openai.api_type = "azure"
+ openai.api_version = AZURE_API_VERSION
+ openai.api_base = AZURE_ENDPOINT
+ openai.api_key = AZURE_API_KEY
+ response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
+
+ except:
+ retry += 1
+ traceback.print_exc()
+ if retry > MAX_RETRY: raise TimeoutError
+ if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
+
+
+ stream_response = response
+ result = ''
+ while True:
+ try: chunk = next(stream_response)
+ except StopIteration:
+ break
+ except:
+ chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
+
+ if len(chunk)==0: continue
+ if not chunk.startswith('data:'):
+ error_msg = get_full_error(chunk, stream_response)
+ if "reduce the length" in error_msg:
+ raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg)
+ else:
+ raise RuntimeError("AZURE OPENAI API拒绝了请求:" + error_msg)
+ if ('data: [DONE]' in chunk): break
+
+ delta = chunk["delta"]
+ if len(delta) == 0: break
+ if "role" in delta: continue
+ if "content" in delta:
+ result += delta["content"]
+ if not console_slience: print(delta["content"], end='')
+ if observe_window is not None:
+ # 观测窗,把已经获取的数据显示出去
+ if len(observe_window) >= 1: observe_window[0] += delta["content"]
+ # 看门狗,如果超过期限没有喂狗,则终止
+ if len(observe_window) >= 2:
+ if (time.time()-observe_window[1]) > watch_dog_patience:
+ raise RuntimeError("用户取消了程序。")
+ else: raise RuntimeError("意外Json结构:"+delta)
+ if chunk['finish_reason'] == 'length':
+ raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
+ return result
+
+
+def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream):
+ """
+ 整合所有信息,选择LLM模型,生成 azure openai api请求,为发送请求做准备
+ """
+
+ conversation_cnt = len(history) // 2
+
+ messages = [{"role": "system", "content": system_prompt}]
+ if conversation_cnt:
+ for index in range(0, 2*conversation_cnt, 2):
+ what_i_have_asked = {}
+ what_i_have_asked["role"] = "user"
+ what_i_have_asked["content"] = history[index]
+ what_gpt_answer = {}
+ what_gpt_answer["role"] = "assistant"
+ what_gpt_answer["content"] = history[index+1]
+ if what_i_have_asked["content"] != "":
+ if what_gpt_answer["content"] == "": continue
+ messages.append(what_i_have_asked)
+ messages.append(what_gpt_answer)
+ else:
+ messages[-1]['content'] = what_gpt_answer['content']
+
+ what_i_ask_now = {}
+ what_i_ask_now["role"] = "user"
+ what_i_ask_now["content"] = inputs
+ messages.append(what_i_ask_now)
+
+ payload = {
+ "model": llm_kwargs['llm_model'],
+ "messages": messages,
+ "temperature": llm_kwargs['temperature'], # 1.0,
+ "top_p": llm_kwargs['top_p'], # 1.0,
+ "n": 1,
+ "stream": stream,
+ "presence_penalty": 0,
+ "frequency_penalty": 0,
+ "engine": AZURE_ENGINE
+ }
+ try:
+ print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
+ except:
+ print('输入中可能存在乱码。')
+ return payload
+
+
diff --git a/toolbox.py b/toolbox.py
index ac49afc..fb6aa9f 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -6,6 +6,7 @@ import re
import os
from latex2mathml.converter import convert as tex2mathml
from functools import wraps, lru_cache
+pj = os.path.join
"""
========================================================================
@@ -399,7 +400,7 @@ def extract_archive(file_path, dest_dir):
print("Successfully extracted rar archive to {}".format(dest_dir))
except:
print("Rar format requires additional dependencies to install")
- return '\n\n需要安装pip install rarfile来解压rar文件'
+ return '\n\n解压失败! 需要安装pip install rarfile来解压rar文件'
# 第三方库,需要预先pip install py7zr
elif file_extension == '.7z':
@@ -410,7 +411,7 @@ def extract_archive(file_path, dest_dir):
print("Successfully extracted 7z archive to {}".format(dest_dir))
except:
print("7z format requires additional dependencies to install")
- return '\n\n需要安装pip install py7zr来解压7z文件'
+ return '\n\n解压失败! 需要安装pip install py7zr来解压7z文件'
else:
return ''
return ''
@@ -444,10 +445,12 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
import shutil
if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
new_path = os.path.join(f'./gpt_log/', rename_file)
- if os.path.exists(new_path): os.remove(new_path)
- shutil.copyfile(file, new_path)
+ if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
+ if not os.path.exists(new_path): shutil.copyfile(file, new_path)
if chatbot:
- chatbot._cookies.update({'file_to_promote': [new_path]})
+ if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote']
+ else: current = []
+ chatbot._cookies.update({'file_to_promote': [new_path] + current})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
"""
@@ -802,7 +805,8 @@ def zip_result(folder):
import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
zip_folder(folder, './gpt_log/', f'{t}-result.zip')
-
+ return pj('./gpt_log/', f'{t}-result.zip')
+
def gen_time_str():
import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())