合并master

This commit is contained in:
w_xiaolizu
2023-06-04 20:26:04 +08:00
14 changed files with 1325 additions and 103 deletions

View File

@ -1,8 +1,8 @@
> **Note** > **Note**
> >
> 5月27日对gradio依赖进行了较大的修复和调整,fork并解决了官方Gradio的一系列bug。但如果27日当天进行了更新可能会导致代码报错依赖缺失卡在loading界面等请及时更新到**最新版代码**并重新安装pip依赖即可。若给您带来困扰还请谅解。安装依赖时请严格选择requirements.txt中**指定的版本** > 2023.5.27 对Gradio依赖进行了调整Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时请严格选择`requirements.txt`中**指定的版本**
> >
> `pip install -r requirements.txt -i https://pypi.org/simple` > `pip install -r requirements.txt`
> >
# <img src="docs/logo.png" width="40" > GPT 学术优化 (GPT Academic) # <img src="docs/logo.png" width="40" > GPT 学术优化 (GPT Academic)
@ -175,21 +175,26 @@ docker-compose up
## 安装-方法3其他部署姿势 ## 安装-方法3其他部署姿势
1. 一键运行脚本。
完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本,
不建议电脑上已有python的用户采用此方法在此基础上安装插件的依赖很麻烦
脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。
1. 如何使用反代URL/微软云AzureAPI 2. 使用docker-compose运行。
请阅读docker-compose.yml后按照其中的提示操作即可
3. 如何使用反代URL/微软云AzureAPI。
按照`config.py`中的说明配置API_URL_REDIRECT即可。 按照`config.py`中的说明配置API_URL_REDIRECT即可。
2. 远程云服务器部署(需要云服务器知识与经验) 4. 远程云服务器部署(需要云服务器知识与经验)
请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97) 请访问[部署wiki-1](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
3. 使用WSL2Windows Subsystem for Linux 子系统) 5. 使用WSL2Windows Subsystem for Linux 子系统)
请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2) 请访问[部署wiki-2](https://github.com/binary-husky/chatgpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
4. 如何在二级网址(如`http://localhost/subpath`)下运行 6. 如何在二级网址(如`http://localhost/subpath`)下运行
请访问[FastAPI运行说明](docs/WithFastapi.md) 请访问[FastAPI运行说明](docs/WithFastapi.md)
5. 使用docker-compose运行
请阅读docker-compose.yml后按照其中的提示操作即可
--- ---
# Advanced Usage # Advanced Usage
## 自定义新的便捷按钮 / 自定义函数插件 ## 自定义新的便捷按钮 / 自定义函数插件
@ -327,4 +332,5 @@ https://github.com/kaixindelele/ChatPaper
# 更多: # 更多:
https://github.com/gradio-app/gradio https://github.com/gradio-app/gradio
https://github.com/fghrsh/live2d_demo https://github.com/fghrsh/live2d_demo
https://github.com/oobabooga/one-click-installers
``` ```

View File

@ -34,58 +34,28 @@ def print亮紫(*kw,**kargs):
def print亮靛(*kw,**kargs): def print亮靛(*kw,**kargs):
print("\033[1;36m",*kw,"\033[0m",**kargs) print("\033[1;36m",*kw,"\033[0m",**kargs)
# Do you like the elegance of Chinese characters?
def sprint红(*kw):
def print亮红(*kw,**kargs): return "\033[0;31m"+' '.join(kw)+"\033[0m"
print("\033[1;31m",*kw,"\033[0m",**kargs) def sprint绿(*kw):
def print亮绿(*kw,**kargs): return "\033[0;32m"+' '.join(kw)+"\033[0m"
print("\033[1;32m",*kw,"\033[0m",**kargs) def sprint(*kw):
def print亮黄(*kw,**kargs): return "\033[0;33m"+' '.join(kw)+"\033[0m"
print("\033[1;33m",*kw,"\033[0m",**kargs) def sprint(*kw):
def print亮蓝(*kw,**kargs): return "\033[0;34m"+' '.join(kw)+"\033[0m"
print("\033[1;34m",*kw,"\033[0m",**kargs) def sprint(*kw):
def print亮紫(*kw,**kargs): return "\033[0;35m"+' '.join(kw)+"\033[0m"
print("\033[1;35m",*kw,"\033[0m",**kargs) def sprint(*kw):
def print亮靛(*kw,**kargs): return "\033[0;36m"+' '.join(kw)+"\033[0m"
print("\033[1;36m",*kw,"\033[0m",**kargs) def sprint亮红(*kw):
return "\033[1;31m"+' '.join(kw)+"\033[0m"
print_red = print红 def sprint亮绿(*kw):
print_green = print绿 return "\033[1;32m"+' '.join(kw)+"\033[0m"
print_yellow = print黄 def sprint亮黄(*kw):
print_blue = print蓝 return "\033[1;33m"+' '.join(kw)+"\033[0m"
print_purple = print紫 def sprint亮蓝(*kw):
print_indigo = print靛 return "\033[1;34m"+' '.join(kw)+"\033[0m"
def sprint亮紫(*kw):
print_bold_red = print亮红 return "\033[1;35m"+' '.join(kw)+"\033[0m"
print_bold_green = print亮绿 def sprint亮靛(*kw):
print_bold_yellow = print亮黄 return "\033[1;36m"+' '.join(kw)+"\033[0m"
print_bold_blue = print亮蓝
print_bold_purple = print亮紫
print_bold_indigo = print亮靛
if not stdout.isatty():
# redirection, avoid a fucked up log file
print红 = print
print绿 = print
print黄 = print
print蓝 = print
print紫 = print
print靛 = print
print亮红 = print
print亮绿 = print
print亮黄 = print
print亮蓝 = print
print亮紫 = print
print亮靛 = print
print_red = print
print_green = print
print_yellow = print
print_blue = print
print_purple = print
print_indigo = print
print_bold_red = print
print_bold_green = print
print_bold_yellow = print
print_bold_blue = print
print_bold_purple = print
print_bold_indigo = print

View File

@ -138,7 +138,7 @@ def get_crazy_functions():
###################### 第二组插件 ########################### ###################### 第二组插件 ###########################
# [第二组插件]: 经过充分测试 # [第二组插件]: 经过充分测试
from crazy_functions.批量总结PDF文档 import 批量总结PDF文档 from crazy_functions.批量总结PDF文档 import 批量总结PDF文档
from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer # from crazy_functions.批量总结PDF文档pdfminer import 批量总结PDF文档pdfminer
from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档 from crazy_functions.批量翻译PDF文档_多线程 import 批量翻译PDF文档
from crazy_functions.谷歌检索小助手 import 谷歌检索小助手 from crazy_functions.谷歌检索小助手 import 谷歌检索小助手
from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入 from crazy_functions.理解PDF文档内容 import 理解PDF文档内容标准文件输入
@ -164,17 +164,16 @@ def get_crazy_functions():
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"Function": HotReload(批量总结PDF文档) "Function": HotReload(批量总结PDF文档)
}, },
"[测试功能] 批量总结PDF文档pdfminer": { # "[测试功能] 批量总结PDF文档pdfminer": {
"Color": "stop", # "Color": "stop",
"AsButton": False, # 加入下拉菜单中 # "AsButton": False, # 加入下拉菜单中
"Function": HotReload(批量总结PDF文档pdfminer) # "Function": HotReload(批量总结PDF文档pdfminer)
}, # },
"谷歌学术检索助手输入谷歌学术搜索页url": { "谷歌学术检索助手输入谷歌学术搜索页url": {
"Color": "stop", "Color": "stop",
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
"Function": HotReload(谷歌检索小助手) "Function": HotReload(谷歌检索小助手)
}, },
"理解PDF文档内容 模仿ChatPDF": { "理解PDF文档内容 模仿ChatPDF": {
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"Color": "stop", "Color": "stop",
@ -193,7 +192,7 @@ def get_crazy_functions():
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
"Function": HotReload(Latex英文纠错) "Function": HotReload(Latex英文纠错)
}, },
"[测试功能] 中文Latex项目全文润色输入路径或上传压缩包": { "中文Latex项目全文润色输入路径或上传压缩包": {
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
"Color": "stop", "Color": "stop",
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
@ -222,15 +221,7 @@ def get_crazy_functions():
}) })
###################### 第三组插件 ########################### ###################### 第三组插件 ###########################
# [第三组插件]: 尚未充分测试的函数插件,放在这里 # [第三组插件]: 尚未充分测试的函数插件
from crazy_functions.下载arxiv论文翻译摘要 import 下载arxiv论文并翻译摘要
function_plugins.update({
"一键下载arxiv论文并翻译摘要先在input输入编号如1812.10695": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(下载arxiv论文并翻译摘要)
}
})
from crazy_functions.联网的ChatGPT import 连接网络回答问题 from crazy_functions.联网的ChatGPT import 连接网络回答问题
function_plugins.update({ function_plugins.update({
@ -307,5 +298,56 @@ def get_crazy_functions():
except: except:
print('Load function plugin failed') print('Load function plugin failed')
try:
from crazy_functions.Langchain知识库 import 知识库问答
function_plugins.update({
"[功能尚不稳定] 构建知识库(请先上传文件素材)": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder": "待注入的知识库名称id, 默认为default",
"Function": HotReload(知识库问答)
}
})
except:
print('Load function plugin failed')
try:
from crazy_functions.Langchain知识库 import 读取知识库作答
function_plugins.update({
"[功能尚不稳定] 知识库问答": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder": "待提取的知识库名称id, 默认为default, 您需要首先调用构建知识库",
"Function": HotReload(读取知识库作答)
}
})
except:
print('Load function plugin failed')
try:
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
function_plugins.update({
"[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
"Color": "stop",
"AsButton": False,
# "AdvancedArgs": True,
# "ArgsReminder": "",
"Function": HotReload(Latex英文纠错加PDF对比)
}
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
"[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": {
"Color": "stop",
"AsButton": False,
# "AdvancedArgs": True,
# "ArgsReminder": "",
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
except:
print('Load function plugin failed')
###################### 第n组插件 ########################### ###################### 第n组插件 ###########################
return function_plugins return function_plugins

View File

@ -0,0 +1,98 @@
from toolbox import CatchException, update_ui, ProxyNetworkActivate
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
@CatchException
def 知识库问答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append(("这是什么功能?", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# resolve deps
try:
from zh_langchain import construct_vector_store
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from .crazy_utils import knowledge_archive_interface
except Exception as e:
chatbot.append(
["依赖不足",
"导入依赖失败。正在尝试自动安装,请查看终端的输出或耐心等待..."]
)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
from .crazy_utils import try_install_deps
try_install_deps(['zh_langchain==0.2.0'])
# < --------------------读取参数--------------- >
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
# < --------------------读取文件--------------- >
file_manifest = []
spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
for sp in spl:
_, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
file_manifest += file_manifest_tmp
if len(file_manifest) == 0:
chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# < -------------------预热文本向量化模组--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate(): # 临时地激活代理网络
HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
# < -------------------构建知识库--------------- >
chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
print('Establishing knowledge archive ...')
with ProxyNetworkActivate(): # 临时地激活代理网络
kai = knowledge_archive_interface()
kai.feed_archive(file_manifest=file_manifest, id=kai_id)
kai_files = kai.get_loaded_file()
kai_files = '<br/>'.join(kai_files)
# chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
# yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
# chatbot._cookies['lock_plugin'] = 'crazy_functions.Langchain知识库->读取知识库作答'
# chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问答”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
@CatchException
def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
# < ------------------- --------------- >
from .crazy_utils import knowledge_archive_interface
kai = knowledge_archive_interface()
if 'langchain_plugin_embedding' in chatbot._cookies:
resp, prompt = kai.answer_with_archive_by_id(txt, chatbot._cookies['langchain_plugin_embedding'])
else:
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
kai_id = plugin_kwargs.get("advanced_arg", 'default')
resp, prompt = kai.answer_with_archive_by_id(txt, kai_id)
chatbot.append((txt, '[Local Message] ' + prompt))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt=system_prompt
)
history.extend((prompt, gpt_say))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新

View File

@ -238,3 +238,6 @@ def Latex英文纠错(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_p
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return return
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread') yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en', mode='proofread')

View File

@ -0,0 +1,263 @@
from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
import glob, os, requests, time
pj = os.path.join
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
# =================================== 工具函数 ===============================================
沙雕GPT啊别犯这些低级翻译错误 = 'You must to translate "agent" to "智能体". '
def switch_prompt(pfg, mode):
"""
Generate prompts and system prompts based on the mode for proofreading or translating.
Args:
- pfg: Proofreader or Translator instance.
- mode: A string specifying the mode, either 'proofread' or 'translate_zh'.
Returns:
- inputs_array: A list of strings containing prompts for users to respond to.
- sys_prompt_array: A list of strings containing prompts for system prompts.
"""
n_split = len(pfg.sp_file_contents)
if mode == 'proofread':
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif mode == 'translate_zh':
inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese." + 沙雕GPT啊别犯这些低级翻译错误 +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the translated text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional translator." for _ in range(n_split)]
else:
assert False, "未知指令"
return inputs_array, sys_prompt_array
def desend_to_extracted_folder_if_exist(project_folder):
"""
Descend into the extracted folder if it exists, otherwise return the original folder.
Args:
- project_folder: A string specifying the folder path.
Returns:
- A string specifying the path to the extracted folder, or the original folder if there is no extracted folder.
"""
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
if len(maybe_dir) == 0: return project_folder
if maybe_dir[0].endswith('.extract'): return maybe_dir[0]
return project_folder
def move_project(project_folder, arxiv_id=None):
"""
Create a new work folder and copy the project folder to it.
Args:
- project_folder: A string specifying the folder path of the project.
Returns:
- A string specifying the path to the new work folder.
"""
import shutil, time
time.sleep(2) # avoid time string conflict
if arxiv_id is not None:
new_workfolder = pj(ARXIV_CACHE_DIR, arxiv_id, 'workfolder')
else:
new_workfolder = f'gpt_log/{gen_time_str()}'
try: shutil.rmtree(new_workfolder)
except: pass
shutil.copytree(src=project_folder, dst=new_workfolder)
return new_workfolder
def arxiv_download(chatbot, history, txt):
def check_cached_translation_pdf(arxiv_id):
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'translation')
if not os.path.exists(translation_dir):
os.makedirs(translation_dir)
target_file = pj(translation_dir, 'translate_zh.pdf')
if os.path.exists(target_file):
promote_file_to_downloadzone(target_file)
return target_file
return False
if not txt.startswith('https://arxiv.org'):
return txt, None
# <-------------- inspect format ------------->
chatbot.append([f"检测到arxiv文档连接", '尝试下载 ...'])
yield from update_ui(chatbot=chatbot, history=history)
time.sleep(1) # 刷新界面
url_ = txt # https://arxiv.org/abs/1707.06690
if not txt.startswith('https://arxiv.org/abs/'):
msg = f"解析arxiv网址失败, 期望格式例如: https://arxiv.org/abs/1707.06690。实际得到格式: {url_}"
yield from update_ui_lastest_msg(msg, chatbot=chatbot, history=history) # 刷新界面
return msg, None
# <-------------- set format ------------->
arxiv_id = url_.split('/abs/')[-1]
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
if cached_translation_pdf: return cached_translation_pdf, arxiv_id
url_tar = url_.replace('/abs/', '/e-print/')
translation_dir = pj(ARXIV_CACHE_DIR, arxiv_id, 'e-print')
extract_dst = pj(ARXIV_CACHE_DIR, arxiv_id, 'extract')
os.makedirs(translation_dir, exist_ok=True)
# <-------------- download arxiv source file ------------->
dst = pj(translation_dir, arxiv_id+'.tar')
if os.path.exists(dst):
yield from update_ui_lastest_msg("调用缓存", chatbot=chatbot, history=history) # 刷新界面
else:
yield from update_ui_lastest_msg("开始下载", chatbot=chatbot, history=history) # 刷新界面
proxies, = get_conf('proxies')
r = requests.get(url_tar, proxies=proxies)
with open(dst, 'wb+') as f:
f.write(r.content)
# <-------------- extract file ------------->
yield from update_ui_lastest_msg("下载完成", chatbot=chatbot, history=history) # 刷新界面
from toolbox import extract_archive
extract_archive(file_path=dst, dest_dir=extract_dst)
return extract_dst, arxiv_id
# ========================================= 插件主程序1 =====================================================
@CatchException
def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# <-------------- information about this plugin ------------->
chatbot.append([ "函数插件功能?",
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4其他模型转化效果未知。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。仅在Windows系统进行了测试其他操作系统表现未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- check deps ------------->
try:
import glob, os, time
os.system(f'pdflatex -version')
from .latex_utils import Latex精细分解与转化, 编译Latex差别
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- clear history and read input ------------->
history = []
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- if is a zip/tar file ------------->
project_folder = desend_to_extracted_folder_if_exist(project_folder)
# <-------------- move latex project away from temp folder ------------->
project_folder = move_project(project_folder, arxiv_id=None)
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
# <-------------- compile PDF ------------->
success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
# <-------------- zip PDF ------------->
zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
# <-------------- we are done ------------->
return success
# ========================================= 插件主程序2 =====================================================
@CatchException
def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# <-------------- information about this plugin ------------->
chatbot.append([
"函数插件功能?",
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4其他模型转化效果未知。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。仅在Windows系统进行了测试其他操作系统表现未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- check deps ------------->
try:
import glob, os, time
os.system(f'pdflatex -version')
from .latex_utils import Latex精细分解与转化, 编译Latex差别
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- clear history and read input ------------->
history = []
txt, arxiv_id = yield from arxiv_download(chatbot, history, txt)
if txt.endswith('.pdf'):
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"发现已经存在翻译好的PDF文档")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# <-------------- if is a zip/tar file ------------->
project_folder = desend_to_extracted_folder_if_exist(project_folder)
# <-------------- move latex project away from temp folder ------------->
project_folder = move_project(project_folder, arxiv_id)
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt)
# <-------------- compile PDF ------------->
success = yield from 编译Latex差别(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
# <-------------- zip PDF ------------->
zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
# <-------------- we are done ------------->
return success

View File

@ -3,6 +3,8 @@
这个文件用于函数插件的单元测试 这个文件用于函数插件的单元测试
运行方法 python crazy_functions/crazy_functions_test.py 运行方法 python crazy_functions/crazy_functions_test.py
""" """
# ==============================================================================================================================
def validate_path(): def validate_path():
import os, sys import os, sys
@ -10,10 +12,16 @@ def validate_path():
root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..') root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
os.chdir(root_dir_assume) os.chdir(root_dir_assume)
sys.path.append(root_dir_assume) sys.path.append(root_dir_assume)
validate_path() # validate path so you can run from base directory validate_path() # validate path so you can run from base directory
# ==============================================================================================================================
from colorful import * from colorful import *
from toolbox import get_conf, ChatBotWithCookies from toolbox import get_conf, ChatBotWithCookies
import contextlib
import os
import sys
from functools import wraps
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY') get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY')
@ -30,7 +38,43 @@ history = []
system_prompt = "Serve me as a writing and programming assistant." system_prompt = "Serve me as a writing and programming assistant."
web_port = 1024 web_port = 1024
# ==============================================================================================================================
def silence_stdout(func):
@wraps(func)
def wrapper(*args, **kwargs):
_original_stdout = sys.stdout
sys.stdout = open(os.devnull, 'w')
for q in func(*args, **kwargs):
sys.stdout = _original_stdout
yield q
sys.stdout = open(os.devnull, 'w')
sys.stdout.close()
sys.stdout = _original_stdout
return wrapper
class CLI_Printer():
def __init__(self) -> None:
self.pre_buf = ""
def print(self, buf):
bufp = ""
for index, chat in enumerate(buf):
a, b = chat
bufp += sprint亮靛('[Me]:' + a) + '\n'
bufp += '[GPT]:' + b
if index < len(buf)-1:
bufp += '\n'
if self.pre_buf!="" and bufp.startswith(self.pre_buf):
print(bufp[len(self.pre_buf):], end='')
else:
print('\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'+bufp, end='')
self.pre_buf = bufp
return
cli_printer = CLI_Printer()
# ==============================================================================================================================
def test_解析一个Python项目(): def test_解析一个Python项目():
from crazy_functions.解析项目源代码 import 解析一个Python项目 from crazy_functions.解析项目源代码 import 解析一个Python项目
txt = "crazy_functions/test_project/python/dqn" txt = "crazy_functions/test_project/python/dqn"
@ -116,6 +160,52 @@ def test_Markdown多语言():
for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): for cookies, cb, hist, msg in Markdown翻译指定语言(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
print(cb) print(cb)
def test_Langchain知识库():
from crazy_functions.Langchain知识库 import 知识库问答
txt = "./"
chatbot = ChatBotWithCookies(llm_kwargs)
for cookies, cb, hist, msg in silence_stdout(知识库问答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
chatbot = ChatBotWithCookies(cookies)
from crazy_functions.Langchain知识库 import 读取知识库作答
txt = "What is the installation method"
for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
def test_Langchain知识库读取():
from crazy_functions.Langchain知识库 import 读取知识库作答
txt = "远程云服务器部署?"
for cookies, cb, hist, msg in silence_stdout(读取知识库作答)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
def test_Latex():
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比, Latex翻译中文并重新编译PDF
txt = "C:/Users/fuqingxu/Desktop/proofread"
txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/paperx"
txt = "C:/Users/fuqingxu/Desktop/旧文件/gpt/papery"
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-14-57-06"
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-15-40-20"
txt = r"https://arxiv.org/abs/1902.03185"
txt = r"C:\Users\fuqingxu\Desktop\旧文件\gpt\latex2pdf\2023-06-03-17-14-40"
txt = r"https://arxiv.org/abs/2305.18290"
txt = r"https://arxiv.org/abs/2305.17608"
# txt = r"https://arxiv.org/abs/2306.00324"
txt = r"https://arxiv.org/abs/2211.16068"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
# txt = "2302.02948.tar"
# print(txt)
# main_tex, work_folder = Latex预处理(txt)
# print('main tex:', main_tex)
# res = 编译Latex(main_tex, work_folder)
# # for cookies, cb, hist, msg in silence_stdout(编译Latex)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
# cli_printer.print(cb) # print(cb)
# test_解析一个Python项目() # test_解析一个Python项目()
@ -129,7 +219,8 @@ def test_Markdown多语言():
# test_联网回答问题() # test_联网回答问题()
# test_解析ipynb文件() # test_解析ipynb文件()
# test_数学动画生成manim() # test_数学动画生成manim()
test_Markdown多语言() # test_Langchain知识库()
# test_Langchain知识库读取()
test_Latex()
input("程序完成,回车退出。") input("程序完成,回车退出。")
print("退出。") print("退出。")

View File

@ -1,4 +1,5 @@
from toolbox import update_ui, get_conf, trimmed_format_exc from toolbox import update_ui, get_conf, trimmed_format_exc
import threading
def input_clipping(inputs, history, max_token_limit): def input_clipping(inputs, history, max_token_limit):
import numpy as np import numpy as np
@ -606,3 +607,94 @@ def get_files_from_everything(txt, type): # type='.md'
success = False success = False
return success, file_manifest, project_folder return success, file_manifest, project_folder
def Singleton(cls):
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton
@Singleton
class knowledge_archive_interface():
def __init__(self) -> None:
self.threadLock = threading.Lock()
self.current_id = ""
self.kai_path = None
self.qa_handle = None
self.text2vec_large_chinese = None
def get_chinese_text2vec(self):
if self.text2vec_large_chinese is None:
# < -------------------预热文本向量化模组--------------- >
from toolbox import ProxyNetworkActivate
print('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate(): # 临时地激活代理网络
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
return self.text2vec_large_chinese
def feed_archive(self, file_manifest, id="default"):
self.threadLock.acquire()
# import uuid
self.current_id = id
from zh_langchain import construct_vector_store
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=file_manifest,
sentence_size=100,
history=[],
one_conent="",
one_content_segmentation="",
text2vec = self.get_chinese_text2vec(),
)
self.threadLock.release()
def get_current_archive_id(self):
return self.current_id
def get_loaded_file(self):
return self.qa_handle.get_loaded_file()
def answer_with_archive_by_id(self, txt, id):
self.threadLock.acquire()
if not self.current_id == id:
self.current_id = id
from zh_langchain import construct_vector_store
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=[],
sentence_size=100,
history=[],
one_conent="",
one_content_segmentation="",
text2vec = self.get_chinese_text2vec(),
)
VECTOR_SEARCH_SCORE_THRESHOLD = 0
VECTOR_SEARCH_TOP_K = 4
CHUNK_SIZE = 512
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
query = txt,
vs_path = self.kai_path,
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
vector_search_top_k=VECTOR_SEARCH_TOP_K,
chunk_conent=True,
chunk_size=CHUNK_SIZE,
text2vec = self.get_chinese_text2vec(),
)
self.threadLock.release()
return resp, prompt
def try_install_deps(deps):
for dep in deps:
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])

View File

@ -0,0 +1,606 @@
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
import os, shutil
import re
pj = os.path.join
def 寻找Latex主文件(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个。
P.S. 但愿没人把latex模板放在里面传进来
"""
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8') as f:
file_content = f.read()
if r'\documentclass' in file_content:
return texf
else:
continue
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
def merge_tex_files_(project_foler, main_file, mode):
"""
递归地把多Tex工程整合为一个Tex文档
"""
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
if os.path.exists(fp):
# e.g., \input{srcs/07_appendix.tex}
with open(fp, 'r', encoding='utf-8', errors='replace') as fx:
c = fx.read()
else:
# e.g., \input{srcs/07_appendix}
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
c = fx.read()
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
递归地把多Tex工程整合为一个Tex文档递归外层
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
if mode == 'translate_zh':
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
position = match.end()
main_file = main_file[:position] + '\\usepackage{CTEX}\n\\usepackage{url}\n' + main_file[position:]
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.startswith("%") or (l.startswith(" ") and l.lstrip().startswith("%")):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
class LinkedListNode():
"""
链表单元
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀 艹
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = final_tex.replace('%', r'\%')
final_tex = final_tex.replace(r'\%', r'\\%')
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if node_string.count('{') != node_string.count('}'):
if final_tex.count('{') != node_string.count('{'):
final_tex = node_string # 出问题了,还原原文
if final_tex.count('}') != node_string.count('}'):
final_tex = node_string # 出问题了,还原原文
return final_tex
class LatexPaperSplit():
"""
将Latex文档分解到一个链表中每个链表节点用preserve的标志位提示它是否应当被GPT处理
"""
def __init__(self) -> None:
"""
root是链表的根节点
"""
self.root = None
def merge_result(self, arr, mode, msg):
"""
将GPT处理后的结果融合
"""
result_string = ""
node = self.root
p = 0
while True:
if node.preserve:
result_string += node.string
else:
result_string += fix_content(arr[p], node.string)
p += 1
node = node.next
if node is None: break
if mode == 'translate_zh':
try:
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(result_string)
position = match.end()
result_string = result_string[:position] + \
"{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成其内容可靠性没有任何保障请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" + \
msg + \
"为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" + \
result_string[position:]
except:
pass
return result_string
def split(self, txt, project_folder):
"""
将Latex文档分解到一个链表中每个链表节点用preserve的标志位提示它是否应当被GPT处理
"""
root = LinkedListNode(txt, False)
def split_worker(root, pattern, flags=0):
lt = root
cnt = 0
pattern_compile = re.compile(pattern, flags)
while True:
if not lt.preserve:
while True:
res = pattern_compile.search(lt.string)
if not res: break
before = res.string[:res.span()[0]]
this = res.group(0)
after = res.string[res.span()[1]:]
# ======
lt.string = before
tmp = lt.next
# ======
mid = LinkedListNode(this, True)
lt.next = mid
# ======
aft = LinkedListNode(after, False)
mid.next = aft
aft.next = tmp
# ======
lt = aft
lt = lt.next
cnt += 1
# print(cnt)
if lt is None: break
def split_worker_begin_end(root, pattern, flags=0, limit_n_lines=25):
lt = root
cnt = 0
pattern_compile = re.compile(pattern, flags)
while True:
if not lt.preserve:
while True:
target_string = lt.string
def search_with_line_limit(target_string):
for res in pattern_compile.finditer(target_string):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof', 'em', 'emph', 'textit', 'textbf']
if cmd in white_list or this.count('\n') > 25:
sub_res = search_with_line_limit(this)
if not sub_res: continue
else: return sub_res
else:
return res.group(0)
return False
# ======
# search for first encounter of \begin \end pair with less than 25 lines in the middle
ps = search_with_line_limit(target_string)
if not ps: break
res = re.search(re.escape(ps), target_string, flags)
if not res: assert False
before = res.string[:res.span()[0]]
this = res.group(0)
after = res.string[res.span()[1]:]
# ======
lt.string = before
tmp = lt.next
# ======
mid = LinkedListNode(this, True)
lt.next = mid
# ======
aft = LinkedListNode(after, False)
mid.next = aft
aft.next = tmp
# ======
lt = aft
lt = lt.next
cnt += 1
# print(cnt)
if lt is None: break
# root 是链表的头
print('正在分解Latex源文件构建链表结构')
# 删除iffalse注释
split_worker(root, r"\\iffalse(.*?)\\fi", re.DOTALL)
# 吸收在25行以内的begin-end组合
split_worker_begin_end(root, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
# 吸收匿名公式
split_worker(root, r"\$\$(.*?)\$\$", re.DOTALL)
# 吸收其他杂项
split_worker(root, r"(.*?)\\maketitle", re.DOTALL)
split_worker(root, r"\\section\{(.*?)\}")
split_worker(root, r"\\section\*\{(.*?)\}")
split_worker(root, r"\\subsection\{(.*?)\}")
split_worker(root, r"\\subsubsection\{(.*?)\}")
split_worker(root, r"\\bibliography\{(.*?)\}")
split_worker(root, r"\\bibliographystyle\{(.*?)\}")
split_worker(root, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
split_worker(root, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
split_worker(root, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
split_worker(root, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
split_worker(root, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
split_worker(root, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
split_worker(root, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
split_worker(root, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
split_worker(root, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
split_worker(root, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
split_worker(root, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
split_worker(root, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
split_worker(root, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
split_worker(root, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
split_worker(root, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
split_worker(root, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
split_worker(root, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
split_worker(root, r"\\item ")
split_worker(root, r"\\label\{(.*?)\}")
split_worker(root, r"\\begin\{(.*?)\}")
split_worker(root, r"\\vspace\{(.*?)\}")
split_worker(root, r"\\hspace\{(.*?)\}")
split_worker(root, r"\\end\{(.*?)\}")
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
node = node.next
if node is None: break
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack kill')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<50: node.preserve = True
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 将分解结果返回 res_to_t
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
res_to_t = []
node = root
while True:
show_html = node.string.replace('\n','<br/>')
if not node.preserve:
res_to_t.append(node.string)
f.write(f'<p style="color:black;">#{show_html}#</p>')
else:
f.write(f'<p style="color:red;">{show_html}</p>')
node = node.next
if node is None: break
self.root = root
self.sp = res_to_t
return self.sp
class LatexPaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
print('Segmentation: done')
def merge_result(self):
self.file_result = ["" for _ in range(len(self.file_paths))]
for r, k in zip(self.sp_file_result, self.sp_file_index):
self.file_result[k] += r
def write_result(self):
manifest = []
for path, res in zip(self.file_paths, self.file_result):
with open(path + '.polish.tex', 'w', encoding='utf8') as f:
manifest.append(path + '.polish.tex')
f.write(res)
return manifest
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
# <-------- 寻找主tex文件 ---------->
maintex = 寻找Latex主文件(file_manifest, mode)
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
time.sleep(5)
# <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
main_tex_basename = os.path.basename(maintex)
assert main_tex_basename.endswith('.tex')
main_tex_basename_bare = main_tex_basename[:-4]
may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
if os.path.exists(may_exist_bbl):
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
content = f.read()
merged_content = merge_tex_files(project_folder, content, mode)
with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
f.write(merged_content)
# <-------- 精细切分latex文件 ---------->
lps = LatexPaperSplit()
res = lps.split(merged_content, project_folder)
# <-------- 拆分过长的latex片段 ---------->
pfg = LatexPaperFileGroup()
for index, r in enumerate(res):
pfg.file_paths.append('segment-' + str(index))
pfg.file_contents.append(r)
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
# <-------- 根据需要切换prompt ---------->
inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
if os.path.exists(pj(project_folder,'temp.pkl')):
# <-------- 【仅调试】如果存在调试缓存文件则跳过GPT请求环节 ---------->
pfg = objload(file=pj(project_folder,'temp.pkl'))
else:
# <-------- gpt 多线程请求 ---------->
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
scroller_max_len = 40
)
# <-------- 文本碎片重组为完整的tex片段 ---------->
pfg.sp_file_result = []
for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
pfg.sp_file_result.append(gpt_say)
pfg.merge_result()
# <-------- 临时存储用于调试 ---------->
pfg.get_token_num = None
objdump(pfg, file=pj(project_folder,'temp.pkl'))
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}"
final_tex = lps.merge_result(pfg.file_result, mode, msg)
with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
f.write(final_tex)
# <-------- 整理结果, 退出 ---------->
chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 返回 ---------->
return project_folder + f'/merge_{mode}.tex'
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
try:
with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
log = f.read()
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
file_lines = f.readlines()
import re
buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
buggy_lines = [int(l) for l in buggy_lines]
buggy_lines = sorted(buggy_lines)
print("removing lines that has errors", buggy_lines)
file_lines.pop(buggy_lines[0]-1)
with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
f.writelines(file_lines)
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
except:
return False, 0, [0]
def compile_latex_with_timeout(command, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def 编译Latex差别(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
import os, time
current_dir = os.getcwd()
n_fix = 1
max_try = 32
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}如果程序停顿5分钟以上则大概率是卡死在Latex里面了。不幸卡死时请直接去该路径下取回翻译结果或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
while True:
import os
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
# 只有第二步成功,才能继续下面的步骤
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir)
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
# <--------------------->
os.chdir(current_dir)
# <---------- 检查结果 ----------->
results_ = ""
original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
results_ += f"原始PDF编译是否成功: {original_pdf_success};"
results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
yield from update_ui_lastest_msg(f'{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(current_dir)
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
promote_file_to_downloadzone(result_pdf)
return True # 成功啦
else:
if n_fix>=max_try: break
n_fix += 1
can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
tex_name=f'{main_file_modified}.tex',
tex_name_pure=f'{main_file_modified}',
n_fix=n_fix,
work_folder_modified=work_folder_modified,
)
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break
os.chdir(current_dir)
return False # 失败啦

View File

@ -2,11 +2,11 @@
> >
> Durante l'installazione delle dipendenze, selezionare rigorosamente le **versioni specificate** nel file requirements.txt. > Durante l'installazione delle dipendenze, selezionare rigorosamente le **versioni specificate** nel file requirements.txt.
> >
> ` pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/` > ` pip install -r requirements.txt`
# <img src="docs/logo.png" width="40" > GPT Ottimizzazione Accademica (GPT Academic) # <img src="logo.png" width="40" > GPT Ottimizzazione Accademica (GPT Academic)
**Se ti piace questo progetto, ti preghiamo di dargli una stella. Se hai sviluppato scorciatoie accademiche o plugin funzionali più utili, non esitare ad aprire una issue o pull request. Abbiamo anche una README in [Inglese|](docs/README_EN.md)[Giapponese|](docs/README_JP.md)[Coreano|](https://github.com/mldljyh/ko_gpt_academic)[Russo|](docs/README_RS.md)[Francese](docs/README_FR.md) tradotta da questo stesso progetto. **Se ti piace questo progetto, ti preghiamo di dargli una stella. Se hai sviluppato scorciatoie accademiche o plugin funzionali più utili, non esitare ad aprire una issue o pull request. Abbiamo anche una README in [Inglese|](README_EN.md)[Giapponese|](README_JP.md)[Coreano|](https://github.com/mldljyh/ko_gpt_academic)[Russo|](README_RS.md)[Francese](README_FR.md) tradotta da questo stesso progetto.
Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`multi_language.py`](multi_language.py) (sperimentale). Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`multi_language.py`](multi_language.py) (sperimentale).
> **Nota** > **Nota**
@ -17,7 +17,9 @@ Per tradurre questo progetto in qualsiasi lingua con GPT, leggere e eseguire [`m
> >
> 3. Questo progetto è compatibile e incoraggia l'utilizzo di grandi modelli di linguaggio di produzione nazionale come chatglm, RWKV, Pangu ecc. Supporta la coesistenza di più api-key e può essere compilato nel file di configurazione come `API_KEY="openai-key1,openai-key2,api2d-key3"`. Per sostituire temporaneamente `API_KEY`, inserire `API_KEY` temporaneo nell'area di input e premere Invio per renderlo effettivo. > 3. Questo progetto è compatibile e incoraggia l'utilizzo di grandi modelli di linguaggio di produzione nazionale come chatglm, RWKV, Pangu ecc. Supporta la coesistenza di più api-key e può essere compilato nel file di configurazione come `API_KEY="openai-key1,openai-key2,api2d-key3"`. Per sostituire temporaneamente `API_KEY`, inserire `API_KEY` temporaneo nell'area di input e premere Invio per renderlo effettivo.
<div align="center">Funzione | Descrizione <div align="center">
Funzione | Descrizione
--- | --- --- | ---
Correzione immediata | Supporta correzione immediata e ricerca degli errori di grammatica del documento con un solo clic Correzione immediata | Supporta correzione immediata e ricerca degli errori di grammatica del documento con un solo clic
Traduzione cinese-inglese immediata | Traduzione cinese-inglese immediata con un solo clic Traduzione cinese-inglese immediata | Traduzione cinese-inglese immediata con un solo clic
@ -41,6 +43,8 @@ Avvia il tema di gradio [scuro](https://github.com/binary-husky/chatgpt_academic
Supporto per maggiori modelli LLM, supporto API2D | Sentirsi serviti simultaneamente da GPT3.5, GPT4, [Tsinghua ChatGLM](https://github.com/THUDM/ChatGLM-6B), [Fudan MOSS](https://github.com/OpenLMLab/MOSS) deve essere una grande sensazione, giusto? Supporto per maggiori modelli LLM, supporto API2D | Sentirsi serviti simultaneamente da GPT3.5, GPT4, [Tsinghua ChatGLM](https://github.com/THUDM/ChatGLM-6B), [Fudan MOSS](https://github.com/OpenLMLab/MOSS) deve essere una grande sensazione, giusto?
Ulteriori modelli LLM supportat,i supporto per l'implementazione di Huggingface | Aggiunta di un'interfaccia Newbing (Nuovo Bing), introdotta la compatibilità con Tsinghua [Jittorllms](https://github.com/Jittor/JittorLLMs), [LLaMA](https://github.com/facebookresearch/llama), [RWKV](https://github.com/BlinkDL/ChatRWKV) e [PanGu-α](https://openi.org.cn/pangu/) Ulteriori modelli LLM supportat,i supporto per l'implementazione di Huggingface | Aggiunta di un'interfaccia Newbing (Nuovo Bing), introdotta la compatibilità con Tsinghua [Jittorllms](https://github.com/Jittor/JittorLLMs), [LLaMA](https://github.com/facebookresearch/llama), [RWKV](https://github.com/BlinkDL/ChatRWKV) e [PanGu-α](https://openi.org.cn/pangu/)
Ulteriori dimostrazioni di nuove funzionalità (generazione di immagini, ecc.)... | Vedere la fine di questo documento... Ulteriori dimostrazioni di nuove funzionalità (generazione di immagini, ecc.)... | Vedere la fine di questo documento...
</div>
- Nuova interfaccia (modificare l'opzione LAYOUT in `config.py` per passare dal layout a sinistra e a destra al layout superiore e inferiore) - Nuova interfaccia (modificare l'opzione LAYOUT in `config.py` per passare dal layout a sinistra e a destra al layout superiore e inferiore)
<div align="center"> <div align="center">
@ -202,11 +206,13 @@ ad esempio
2. Plugin di funzione personalizzati 2. Plugin di funzione personalizzati
Scrivi plugin di funzione personalizzati e esegui tutte le attività che desideri o non hai mai pensato di fare. Scrivi plugin di funzione personalizzati e esegui tutte le attività che desideri o non hai mai pensato di fare.
La difficoltà di scrittura e debug dei plugin del nostro progetto è molto bassa. Se si dispone di una certa conoscenza di base di Python, è possibile realizzare la propria funzione del plugin seguendo il nostro modello. Per maggiori dettagli, consultare la [guida al plugin per funzioni] (https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97). La difficoltà di scrittura e debug dei plugin del nostro progetto è molto bassa. Se si dispone di una certa conoscenza di base di Python, è possibile realizzare la propria funzione del plugin seguendo il nostro modello. Per maggiori dettagli, consultare la [guida al plugin per funzioni](https://github.com/binary-husky/chatgpt_academic/wiki/%E5%87%BD%E6%95%B0%E6%8F%92%E4%BB%B6%E6%8C%87%E5%8D%97).
--- ---
# Ultimo aggiornamento # Ultimo aggiornamento
## Nuove funzionalità dinamiche1. Funzionalità di salvataggio della conversazione. Nell'area dei plugin della funzione, fare clic su "Salva la conversazione corrente" per salvare la conversazione corrente come file html leggibile e ripristinabile, inoltre, nell'area dei plugin della funzione (menu a discesa), fare clic su "Carica la cronologia della conversazione archiviata" per ripristinare la conversazione precedente. Suggerimento: fare clic su "Carica la cronologia della conversazione archiviata" senza specificare il file consente di visualizzare la cache degli archivi html di cronologia, fare clic su "Elimina tutti i record di cronologia delle conversazioni locali" per eliminare tutte le cache degli archivi html. ## Nuove funzionalità dinamiche
1. Funzionalità di salvataggio della conversazione. Nell'area dei plugin della funzione, fare clic su "Salva la conversazione corrente" per salvare la conversazione corrente come file html leggibile e ripristinabile, inoltre, nell'area dei plugin della funzione (menu a discesa), fare clic su "Carica la cronologia della conversazione archiviata" per ripristinare la conversazione precedente. Suggerimento: fare clic su "Carica la cronologia della conversazione archiviata" senza specificare il file consente di visualizzare la cache degli archivi html di cronologia, fare clic su "Elimina tutti i record di cronologia delle conversazioni locali" per eliminare tutte le cache degli archivi html.
<div align="center"> <div align="center">
<img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" > <img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" >
</div> </div>
@ -307,4 +313,4 @@ https://github.com/kaixindelele/ChatPaper
# Altro: # Altro:
https://github.com/gradio-app/gradio https://github.com/gradio-app/gradio
https://github.com/fghrsh/live2d_demo https://github.com/fghrsh/live2d_demo
``` ```

View File

@ -17,7 +17,9 @@ GPT를 이용하여 프로젝트를 임의의 언어로 번역하려면 [`multi_
> >
> 3. 이 프로젝트는 국내 언어 모델 chatglm과 RWKV, 판고 등의 시도와 호환 가능합니다. 여러 개의 api-key를 지원하며 설정 파일에 "API_KEY="openai-key1,openai-key2,api2d-key3""와 같이 작성할 수 있습니다. `API_KEY`를 임시로 변경해야하는 경우 입력 영역에 임시 `API_KEY`를 입력 한 후 엔터 키를 누르면 즉시 적용됩니다. > 3. 이 프로젝트는 국내 언어 모델 chatglm과 RWKV, 판고 등의 시도와 호환 가능합니다. 여러 개의 api-key를 지원하며 설정 파일에 "API_KEY="openai-key1,openai-key2,api2d-key3""와 같이 작성할 수 있습니다. `API_KEY`를 임시로 변경해야하는 경우 입력 영역에 임시 `API_KEY`를 입력 한 후 엔터 키를 누르면 즉시 적용됩니다.
<div align="center">기능 | 설명 <div align="center">
기능 | 설명
--- | --- --- | ---
원 키워드 | 원 키워드 및 논문 문법 오류를 찾는 기능 지원 원 키워드 | 원 키워드 및 논문 문법 오류를 찾는 기능 지원
한-영 키워드 | 한-영 키워드 지원 한-영 키워드 | 한-영 키워드 지원
@ -265,4 +267,4 @@ https://github.com/kaixindelele/ChatPaper
# 더 많은 : # 더 많은 :
https://github.com/gradio-app/gradio https://github.com/gradio-app/gradio
https://github.com/fghrsh/live2d_demo https://github.com/fghrsh/live2d_demo
``` ```

View File

@ -2,7 +2,7 @@
> >
> Ao instalar as dependências, por favor, selecione rigorosamente as versões **especificadas** no arquivo requirements.txt. > Ao instalar as dependências, por favor, selecione rigorosamente as versões **especificadas** no arquivo requirements.txt.
> >
> `pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple/` > `pip install -r requirements.txt`
> >
# <img src="logo.png" width="40" > Otimização acadêmica GPT (GPT Academic) # <img src="logo.png" width="40" > Otimização acadêmica GPT (GPT Academic)
@ -18,7 +18,9 @@ Para traduzir este projeto para qualquer idioma com o GPT, leia e execute [`mult
> >
> 3. Este projeto é compatível com e incentiva o uso de modelos de linguagem nacionais, como chatglm e RWKV, Pangolin, etc. Suporta a coexistência de várias chaves de API e pode ser preenchido no arquivo de configuração como `API_KEY="openai-key1,openai-key2,api2d-key3"`. Quando precisar alterar temporariamente o `API_KEY`, basta digitar o `API_KEY` temporário na área de entrada e pressionar Enter para que ele entre em vigor. > 3. Este projeto é compatível com e incentiva o uso de modelos de linguagem nacionais, como chatglm e RWKV, Pangolin, etc. Suporta a coexistência de várias chaves de API e pode ser preenchido no arquivo de configuração como `API_KEY="openai-key1,openai-key2,api2d-key3"`. Quando precisar alterar temporariamente o `API_KEY`, basta digitar o `API_KEY` temporário na área de entrada e pressionar Enter para que ele entre em vigor.
<div align="center">Funcionalidade | Descrição <div align="center">
Funcionalidade | Descrição
--- | --- --- | ---
Um clique de polimento | Suporte a um clique polimento, um clique encontrar erros de gramática no artigo Um clique de polimento | Suporte a um clique polimento, um clique encontrar erros de gramática no artigo
Tradução chinês-inglês de um clique | Tradução chinês-inglês de um clique Tradução chinês-inglês de um clique | Tradução chinês-inglês de um clique
@ -216,7 +218,9 @@ Para mais detalhes, consulte o [Guia do plug-in de função.](https://github.com
--- ---
# Última atualização # Última atualização
## Novas funções dinâmicas.1. Função de salvamento de diálogo. Ao chamar o plug-in de função "Salvar diálogo atual", é possível salvar o diálogo atual em um arquivo html legível e reversível. Além disso, ao chamar o plug-in de função "Carregar arquivo de histórico de diálogo" no menu suspenso da área de plug-in, é possível restaurar uma conversa anterior. Dica: clicar em "Carregar arquivo de histórico de diálogo" sem especificar um arquivo permite visualizar o cache do arquivo html de histórico. Clicar em "Excluir todo o registro de histórico de diálogo local" permite excluir todo o cache de arquivo html. ## Novas funções dinâmicas.
1. Função de salvamento de diálogo. Ao chamar o plug-in de função "Salvar diálogo atual", é possível salvar o diálogo atual em um arquivo html legível e reversível. Além disso, ao chamar o plug-in de função "Carregar arquivo de histórico de diálogo" no menu suspenso da área de plug-in, é possível restaurar uma conversa anterior. Dica: clicar em "Carregar arquivo de histórico de diálogo" sem especificar um arquivo permite visualizar o cache do arquivo html de histórico. Clicar em "Excluir todo o registro de histórico de diálogo local" permite excluir todo o cache de arquivo html.
<div align="center"> <div align="center">
<img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" > <img src="https://user-images.githubusercontent.com/96192199/235222390-24a9acc0-680f-49f5-bc81-2f3161f1e049.png" width="500" >
</div> </div>
@ -317,4 +321,4 @@ https://github.com/kaixindelele/ChatPaper
# Mais: # Mais:
https://github.com/gradio-app/gradio https://github.com/gradio-app/gradio
https://github.com/fghrsh/live2d_demo https://github.com/fghrsh/live2d_demo
``` ```

View File

@ -2,7 +2,7 @@ import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
def main(): def main():
import gradio as gr import gradio as gr
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "用 pip install -r requirements.txt 安装依赖" if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖详情信息见requirements.txt"
from request_llm.bridge_all import predict from request_llm.bridge_all import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到

View File

@ -1,6 +1,6 @@
import markdown import markdown
import importlib import importlib
import traceback import time
import inspect import inspect
import re import re
import gradio as gr import gradio as gr
@ -102,6 +102,17 @@ def update_ui(chatbot, history, msg='正常', *args): # 刷新界面
yield chatbot.get_cookies(), chatbot, history, msg yield chatbot.get_cookies(), chatbot, history, msg
pool.submit(func_box.thread_write_chat, chatbot) pool.submit(func_box.thread_write_chat, chatbot)
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
"""
刷新用户界面
"""
if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg])
chatbot[-1] = list(chatbot[-1])
chatbot[-1][-1] = lastmsg
yield from update_ui(chatbot=chatbot, history=history)
time.sleep(delay)
def trimmed_format_exc(): def trimmed_format_exc():
import os, traceback import os, traceback
str = traceback.format_exc() str = traceback.format_exc()
@ -115,7 +126,7 @@ def CatchException(f):
""" """
@wraps(f) @wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT): def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
try: try:
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT) yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
except Exception as e: except Exception as e:
@ -458,6 +469,13 @@ def find_recent_files(directory):
return recent_files return recent_files
def promote_file_to_downloadzone(file, rename_file=None):
# 将文件复制一份到下载区
import shutil
if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
new_path = os.path.join(f'./gpt_log/', rename_file)
if os.path.exists(new_path): os.remove(new_path)
shutil.copyfile(file, new_path)
def get_user_upload(chatbot, ipaddr: gr.Request): def get_user_upload(chatbot, ipaddr: gr.Request):
@ -807,6 +825,8 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
其他小工具: 其他小工具:
- zip_folder: 把某个路径下所有文件压缩然后转移到指定的另一个路径中gpt写的 - zip_folder: 把某个路径下所有文件压缩然后转移到指定的另一个路径中gpt写的
- gen_time_str: 生成时间戳 - gen_time_str: 生成时间戳
- ProxyNetworkActivate: 临时地启动代理网络(如果有)
- objdump/objload: 快捷的调试函数
======================================================================== ========================================================================
""" """
@ -841,11 +861,15 @@ def zip_folder(source_folder, dest_folder, zip_name):
print(f"Zip file created at {zip_file}") print(f"Zip file created at {zip_file}")
def zip_result(folder):
import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
zip_folder(folder, './gpt_log/', f'{t}-result.zip')
def gen_time_str(): def gen_time_str():
import time import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
class ProxyNetworkActivate(): class ProxyNetworkActivate():
""" """
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理 这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
@ -854,12 +878,27 @@ class ProxyNetworkActivate():
from toolbox import get_conf from toolbox import get_conf
proxies, = get_conf('proxies') proxies, = get_conf('proxies')
if 'no_proxy' in os.environ: os.environ.pop('no_proxy') if 'no_proxy' in os.environ: os.environ.pop('no_proxy')
os.environ['HTTP_PROXY'] = proxies['http'] if proxies is not None:
os.environ['HTTPS_PROXY'] = proxies['https'] if 'http' in proxies: os.environ['HTTP_PROXY'] = proxies['http']
if 'https' in proxies: os.environ['HTTPS_PROXY'] = proxies['https']
return self return self
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
os.environ['no_proxy'] = '*' os.environ['no_proxy'] = '*'
if 'HTTP_PROXY' in os.environ: os.environ.pop('HTTP_PROXY') if 'HTTP_PROXY' in os.environ: os.environ.pop('HTTP_PROXY')
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY') if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
return return
def objdump(obj, file='objdump.tmp'):
import pickle
with open(file, 'wb+') as f:
pickle.dump(obj, f)
return
def objload(file='objdump.tmp'):
import pickle, os
if not os.path.exists(file):
return
with open(file, 'rb') as f:
return pickle.load(f)