Compare commits

..

135 Commits

Author SHA1 Message Date
0ee25f475e Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-20 23:07:51 +08:00
1fede6df7f temp 2023-06-20 23:05:17 +08:00
22a65cd163 Create build-with-latex.yml 2023-06-21 00:55:24 +10:00
538b041ea3 Merge pull request #890 from Mcskiller/master
Update README.md
2023-06-21 00:53:26 +10:00
d7b056576d add latex docker-compose 2023-06-21 00:52:58 +10:00
cb0bb6ab4a fix minor bugs 2023-06-21 00:41:33 +10:00
bf955aaf12 fix bugs 2023-06-20 23:12:30 +10:00
61eb0da861 fix encoding bug 2023-06-20 22:08:09 +10:00
5da633d94d Update README.md
Fix the error URL for the git clone.
2023-06-20 19:10:11 +08:00
af7734dd35 avoid file fusion 2023-06-19 16:57:11 +10:00
951d5ec758 Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-19 14:52:25 +10:00
a06e43c96b Update README.md 2023-06-18 16:15:37 +08:00
29c6bfb6cb Update README.md 2023-06-18 16:12:06 +08:00
8d7ee975a0 Update README.md 2023-06-18 16:10:45 +08:00
4bafbb3562 Update Latex输出PDF结果.py 2023-06-18 15:54:23 +08:00
2bb13b4677 Update README.md 2023-06-18 15:44:42 +08:00
cbcb98ef6a Merge pull request #872 from Skyzayre/master
Update README.md
2023-06-16 17:54:39 +08:00
bb864c6313 增加一些提示文字 2023-06-16 17:33:19 +08:00
6d849eeb12 修复Langchain插件的bug 2023-06-16 17:33:03 +08:00
ef752838b0 Update README.md 2023-06-15 02:07:43 +08:00
73d4a1ff4b Update README.md 2023-06-14 10:15:47 +08:00
8c62f21aa6 3.41增加gpt-3.5-16k的支持 2023-06-14 09:57:09 +08:00
c40ebfc21f 将gpt-3.5-16k作为加入支持列表 2023-06-14 09:50:15 +08:00
c365ea9f57 Update README.md 2023-06-13 16:13:19 +08:00
12d66777cc Merge pull request #864 from OverKit/master
check letter % after removing spaces or tabs in the left
2023-06-12 15:21:35 +08:00
9ac3d0d65d check letter % after removing spaces or tabs in the left 2023-06-12 10:09:52 +08:00
9fd212652e 专业词汇声明 2023-06-12 09:45:59 +08:00
790a1cf12a 添加一些提示 2023-06-11 20:12:25 +08:00
3ecf2977a8 修复caption翻译 2023-06-11 18:23:54 +08:00
aeddf6b461 Update Latex输出PDF结果.py 2023-06-11 10:20:49 +08:00
ce0d8b9dab 虚空终端插件雏形 2023-06-11 01:36:23 +08:00
3c00e7a143 file link in chatbot 2023-06-10 21:45:38 +08:00
ef1bfdd60f update pip install notice 2023-06-08 21:29:10 +08:00
e48d92e82e update translation 2023-06-08 18:34:06 +08:00
110510997f Update README.md 2023-06-08 12:48:52 +08:00
b52695845e Update README.md 2023-06-08 12:44:05 +08:00
f30c9c6d3b Update README.md 2023-06-08 12:43:13 +08:00
ff5403eac6 Update README.md 2023-06-08 12:42:24 +08:00
f9226d92be Update version 2023-06-08 12:24:14 +08:00
a0ea5d0e9e Update README.md 2023-06-08 12:22:03 +08:00
ce6f11d200 Update README.md 2023-06-08 12:20:49 +08:00
10b3001dba Update README.md 2023-06-08 12:19:11 +08:00
e2de1d76ea Update README.md 2023-06-08 12:18:31 +08:00
77cc141a82 Update README.md 2023-06-08 12:14:02 +08:00
526b4d8ecd Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-07 11:09:20 +08:00
149db621ec langchain check depends 2023-06-07 11:09:12 +08:00
2e1bb7311c Merge pull request #848 from MengDanzz/master
将Dockerfile COPY分成两段,缓存依赖库,重新构建不需要重新安装
2023-06-07 10:44:09 +08:00
dae65fd2c2 在copy ..后在运行一次pip install检查依赖变化 2023-06-07 10:43:45 +08:00
9aafb2ee47 非pypi包加入COPY 2023-06-07 09:18:57 +08:00
6bc91bd02e Merge branch 'binary-husky:master' into master 2023-06-07 09:15:44 +08:00
8ef7344101 fix subprocess bug in Windows 2023-06-06 18:57:52 +08:00
40da1b0afe 将Latex分解程序放到子进程执行 2023-06-06 18:44:00 +08:00
c65def90f3 将Dockerfile COPY分成两段,缓存依赖库,重新构建不需要重新安装 2023-06-06 14:36:30 +08:00
ddeaf76422 check latex in PATH 2023-06-06 00:23:00 +08:00
f23b66dec2 update Dockerfile with Latex 2023-06-05 23:49:54 +08:00
a26b294817 Write Some Docstring 2023-06-05 23:44:59 +08:00
66018840da declare resp 2023-06-05 23:24:41 +08:00
cea2144f34 fix test samples 2023-06-05 23:11:21 +08:00
7f5be93c1d 修正一些正则匹配bug 2023-06-05 22:57:39 +08:00
85b838b302 add Linux support 2023-06-04 23:06:35 +08:00
27f97ba92a remove previous results 2023-06-04 16:55:36 +08:00
14269eba98 建立本地arxiv缓存区 2023-06-04 16:08:01 +08:00
d5c9bc9f0a 提高iffalse搜索优先级 2023-06-04 14:15:59 +08:00
b0fed3edfc consider iffalse state 2023-06-04 14:06:02 +08:00
7296d054a2 patch latex segmentation 2023-06-04 13:56:15 +08:00
d57c7d352d improve quality 2023-06-03 23:54:30 +08:00
3fd2927ea3 改善 2023-06-03 23:33:45 +08:00
b745074160 avoid most compile failure 2023-06-03 23:33:32 +08:00
70ee810133 improve success rate 2023-06-03 19:39:19 +08:00
68fea9e79b fix test 2023-06-03 18:09:39 +08:00
f82bf91aa8 test example 2023-06-03 18:06:39 +08:00
dde9edcc0c fix a fatal mistake 2023-06-03 17:49:22 +08:00
66c78e459e 修正提示 2023-06-03 17:18:38 +08:00
de54102303 修改提醒 2023-06-03 16:43:26 +08:00
7c7d2d8a84 Latex的minipage补丁 2023-06-03 16:16:32 +08:00
834f989ed4 考虑有人用input不加.tex的情况 2023-06-03 15:42:22 +08:00
b658ee6e04 修复arxiv翻译的一些问题 2023-06-03 15:36:55 +08:00
1a60280ea0 添加警告 2023-06-03 14:40:37 +08:00
991cb7d272 warning 2023-06-03 14:39:40 +08:00
463991cfb2 fix bug 2023-06-03 14:24:06 +08:00
06f10b5fdc fix zh cite bug 2023-06-03 14:17:58 +08:00
d275d012c6 Merge branch 'langchain' into master 2023-06-03 13:53:39 +08:00
c5d1ea3e21 update langchain version 2023-06-03 13:53:34 +08:00
0022b92404 update prompt 2023-06-03 13:50:39 +08:00
ef61221241 latex auto translation milestone 2023-06-03 13:46:40 +08:00
5a1831db98 成功! 2023-06-03 00:34:23 +08:00
a643f8b0db debug translation 2023-06-02 23:06:01 +08:00
601712fd0a latex toolchain 2023-06-02 21:44:11 +08:00
e769f831c7 latex 2023-06-02 14:07:04 +08:00
dcd952671f Update main.py 2023-06-01 15:56:52 +08:00
06564df038 Merge branch 'langchain' 2023-06-01 09:39:34 +08:00
2f037f30d5 暂时移除插件锁定 2023-06-01 09:39:00 +08:00
efedab186d Merge branch 'master' into langchain 2023-06-01 00:10:22 +08:00
f49cae5116 Update Langchain知识库.py 2023-06-01 00:09:07 +08:00
2b620ccf2e 更新提示 2023-06-01 00:07:19 +08:00
a1b7a4da56 更新测试案例 2023-06-01 00:03:27 +08:00
61b0e49fed fix some bugs in linux 2023-05-31 23:49:25 +08:00
f60dc371db 12 2023-05-31 10:42:44 +08:00
0a3433b8ac Update README.md 2023-05-31 10:37:08 +08:00
31bce54abb Update README.md 2023-05-31 10:34:21 +08:00
5db1530717 Merge branch 'langchain' of github.com:binary-husky/chatgpt_academic into langchain 2023-05-30 20:08:47 +08:00
c32929fd11 Merge branch 'master' into langchain 2023-05-30 20:08:15 +08:00
3e4c2b056c knowledge base 2023-05-30 19:55:38 +08:00
e79e9d7d23 Merge branch 'master' into langchain 2023-05-30 18:31:39 +08:00
d175b93072 Update README.md.Italian.md 2023-05-30 17:27:41 +08:00
ed254687d2 Update README.md.Italian.md 2023-05-30 17:26:12 +08:00
c0392f7074 Update README.md.Korean.md 2023-05-30 17:25:32 +08:00
f437712af7 Update README.md.Portuguese.md 2023-05-30 17:22:46 +08:00
6d1ea643e9 langchain 2023-05-30 12:54:42 +08:00
9e84cfcd46 Update README.md 2023-05-29 19:48:34 +08:00
897695d29f 修复二级路径的文件屏蔽 2023-05-28 20:25:35 +08:00
1dcc2873d2 修复Gradio配置泄露的问题 2023-05-28 20:23:47 +08:00
42cf738a31 修复一些情况下复制键失效的问题 2023-05-28 18:12:48 +08:00
e4646789af Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-05-28 16:07:29 +08:00
e6c3aabd45 docker-compose check 2023-05-28 16:07:24 +08:00
6789d1fab4 Update README.md 2023-05-28 11:21:50 +08:00
7a733f00a2 Update README.md 2023-05-28 00:19:23 +08:00
dd55888f0e Update README.md 2023-05-28 00:16:45 +08:00
0327df22eb Update README.md 2023-05-28 00:14:54 +08:00
e544f5e9d0 Update README.md 2023-05-27 23:45:15 +08:00
0fad4f44a4 fix dockerfile 2023-05-27 23:36:42 +08:00
1240dd6f26 local gradio 2023-05-27 23:29:22 +08:00
d6be947177 修复gradio的依赖安装问题 2023-05-27 23:10:44 +08:00
3cfbdce9f2 remove limitation for now 2023-05-27 22:25:50 +08:00
1ee471ff57 fix reminder 2023-05-27 22:20:46 +08:00
25ccecf8e3 Update README.md 2023-05-27 21:56:43 +08:00
9e991bfa3e Update requirements.txt 2023-05-27 21:56:16 +08:00
221efd0193 Update README.md 2023-05-27 21:11:25 +08:00
976b9bf65f Update README.md 2023-05-27 21:04:52 +08:00
ae5783e383 修复gradio复制按钮BUG 2023-05-27 20:20:45 +08:00
30224af042 Merge pull request #798 from Bit0r/master
🐛 匹配latex注释的正则表达式
2023-05-27 14:03:07 +08:00
8ff7c15cd8 🐛 匹配latex注释的正则表达式 2023-05-27 11:19:48 +08:00
ec8cc48a4d Add ProxyNetworkActivate 2023-05-25 23:48:18 +08:00
5d75c578b9 fix dependency 2023-05-25 15:28:27 +08:00
cd411c2eea newbing-free deps 2023-05-25 15:12:54 +08:00
21 changed files with 148 additions and 831 deletions

View File

@ -1,15 +1,3 @@
---
title: ChatImprovement
emoji: 😻
colorFrom: blue
colorTo: blue
sdk: gradio
sdk_version: 3.32.0
app_file: app.py
pinned: false
---
# ChatGPT 学术优化
> **Note**
>
> 2023.5.27 对Gradio依赖进行了调整Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时请严格选择`requirements.txt`中**指定的版本**
@ -109,7 +97,7 @@ cd gpt_academic
2. 配置API_KEY
在`config.py`中配置API KEY等设置[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。
在`config.py`中配置API KEY等设置[特殊网络环境设置](https://github.com/binary-husky/gpt_academic/issues/1) 。
(P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。因此,如果您能理解我们的配置读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中。`config_private.py`不受git管控可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项,环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`)
@ -152,9 +140,15 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-
python main.py
```
5. 测试函数插件
```
- 测试函数插件模板函数要求gpt回答历史上的今天发生了什么您可以根据此函数为模板实现更复杂的功能
点击 "[函数插件模板Demo] 历史上的今天"
```
## 安装-方法2使用Docker
1. 仅ChatGPT推荐大多数人选择等价于docker-compose方案1
1. 仅ChatGPT推荐大多数人选择
``` sh
git clone https://github.com/binary-husky/gpt_academic.git # 下载项目
@ -167,43 +161,41 @@ docker run --rm -it --net=host gpt-academic
#(最后一步-选择2在macOS/windows环境下只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
```
P.S. 如果需要依赖Latex的插件功能请见Wiki。另外您也可以直接使用docker-compose获取Latex功能修改docker-compose.yml保留方案4并删除其他方案
P.S. 如果需要依赖Latex的插件功能请见Wiki
2. ChatGPT + ChatGLM + MOSS需要熟悉Docker
``` sh
# 修改docker-compose.yml保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置参考其中注释即可
# 修改docker-compose.yml删除方案1和方案3保留方案2。修改docker-compose.yml中方案2的配置参考其中注释即可
docker-compose up
```
3. ChatGPT + LLAMA + 盘古 + RWKV需要熟悉Docker
``` sh
# 修改docker-compose.yml保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置参考其中注释即可
# 修改docker-compose.yml删除方案1和方案2保留方案3。修改docker-compose.yml中方案3的配置参考其中注释即可
docker-compose up
```
## 安装-方法3其他部署姿势
1. 一键运行脚本。
完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本
完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本
不建议电脑上已有python的用户采用此方法在此基础上安装插件的依赖很麻烦
脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。
2. 使用docker-compose运行。
请阅读docker-compose.yml后按照其中的提示操作即可
3. 如何使用反代URL
3. 如何使用反代URL/微软云AzureAPI。
按照`config.py`中的说明配置API_URL_REDIRECT即可。
4. 微软云AzureAPI
按照`config.py`中的说明配置即可AZURE_ENDPOINT等四个配置
5. 远程云服务器部署(需要云服务器知识与经验)。
4. 远程云服务器部署(需要云服务器知识与经验)。
请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
6. 使用WSL2Windows Subsystem for Linux 子系统)。
5. 使用WSL2Windows Subsystem for Linux 子系统)。
请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
7. 如何在二级网址(如`http://localhost/subpath`)下运行。
6. 如何在二级网址(如`http://localhost/subpath`)下运行。
请访问[FastAPI运行说明](docs/WithFastapi.md)
---

View File

@ -1,7 +1,6 @@
# [step 1]>> 例如: API_KEY = "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" 此key无效
API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY用英文逗号分割例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2"
# [step 2]>> 改为True应用代理如果直接在海外服务器部署此处不修改
USE_PROXY = False
if USE_PROXY:
@ -45,9 +44,10 @@ WEB_PORT = -1
# 如果OpenAI不响应网络卡顿、代理失败、KEY失效重试的次数限制
MAX_RETRY = 2
# OpenAI模型选择是gpt4现在只对申请成功的人开放
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
AVAIL_LLM_MODELS = ["newbing-free", "gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo"]
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
# P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
@ -81,10 +81,3 @@ your bing cookies here
# 如果需要使用Slack Claude使用教程详情见 request_llm/README.md
SLACK_CLAUDE_BOT_ID = ''
SLACK_CLAUDE_USER_TOKEN = ''
# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
AZURE_API_KEY = "填入azure openai api的密钥"
AZURE_API_VERSION = "填入api版本"
AZURE_ENGINE = "填入ENGINE"

View File

@ -226,20 +226,12 @@ def get_crazy_functions():
try:
from crazy_functions.联网的ChatGPT import 连接网络回答问题
function_plugins.update({
"连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
"连接网络回答问题(输入问题,再点击按钮,需要访问谷歌)": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(连接网络回答问题)
}
})
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
function_plugins.update({
"连接网络回答问题中文Bing版输入问题后点击该插件": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(连接bing搜索回答问题)
}
})
except:
print('Load function plugin failed')
@ -356,28 +348,17 @@ def get_crazy_functions():
try:
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
function_plugins.update({
"Latex英文纠错+高亮修正位置 [需Latex]": {
"[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
# "AdvancedArgs": True,
# "ArgsReminder": "",
"Function": HotReload(Latex英文纠错加PDF对比)
}
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
"Arixv翻译输入arxivID[需Latex]": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
function_plugins.update({
"本地论文翻译上传Latex压缩包[需Latex]": {
"Arixv翻译输入arxivID [需Latex]": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
@ -387,6 +368,17 @@ def get_crazy_functions():
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
# function_plugins.update({
# "本地论文翻译上传Latex压缩包 [需Latex]": {
# "Color": "stop",
# "AsButton": False,
# "AdvancedArgs": True,
# "ArgsReminder":
# "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
# "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
# "Function": HotReload(Latex翻译中文并重新编译PDF)
# }
# })
except:
print('Load function plugin failed')

View File

@ -19,9 +19,9 @@ def switch_prompt(pfg, mode, more_requirement):
- sys_prompt_array: A list of strings containing prompts for system prompts.
"""
n_split = len(pfg.sp_file_contents)
if mode == 'proofread_en':
if mode == 'proofread':
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
@ -70,12 +70,6 @@ def move_project(project_folder, arxiv_id=None):
shutil.rmtree(new_workfolder)
except:
pass
# align subfolder if there is a folder wrapper
items = glob.glob(pj(project_folder,'*'))
if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
if os.path.isdir(items[0]): project_folder = items[0]
shutil.copytree(src=project_folder, dst=new_workfolder)
return new_workfolder
@ -147,11 +141,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
chatbot.append([ "函数插件功能?",
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4其他模型转化效果未知。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。仅在Windows系统进行了测试其他操作系统表现未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- more requirements ------------->
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
more_req = plugin_kwargs.get("advanced_arg", "")
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
@ -190,13 +180,13 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
if not os.path.exists(project_folder + '/merge_proofread.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
# <-------------- compile PDF ------------->
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
@ -205,7 +195,6 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
@ -289,7 +278,6 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面

View File

@ -188,15 +188,7 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2305.17608"
# txt = r"https://arxiv.org/abs/2211.16068" # ACE
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
# txt = r"https://arxiv.org/abs/2002.09253"
# txt = r"https://arxiv.org/abs/2306.07831"
# txt = r"https://arxiv.org/abs/2212.10156"
# txt = r"https://arxiv.org/abs/2211.11559"
# txt = r"https://arxiv.org/abs/2303.08774"
txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"https://arxiv.org/abs/2002.09253"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
@ -225,7 +217,6 @@ def test_Latex():
# test_数学动画生成manim()
# test_Langchain知识库()
# test_Langchain知识库读取()
if __name__ == "__main__":
test_Latex()
input("程序完成,回车退出。")
print("退出。")
test_Latex()
input("程序完成,回车退出。")
print("退出。")

View File

@ -8,49 +8,24 @@ pj = os.path.join
"""
========================================================================
Part One
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
Latex segmentation to a linklist
========================================================================
"""
PRESERVE = 0
TRANSFORM = 1
def set_forbidden_text(text, mask, pattern, flags=0):
def split_worker(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
def split_worker_careful_brace(text, mask, pattern, flags=0):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
Move area into preserve area
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
@ -65,12 +40,9 @@ def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
def split_worker_reverse_careful_brace(text, mask, pattern, flags=0):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
Move area out of preserve area
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
@ -83,12 +55,9 @@ def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wr
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
@ -141,41 +110,19 @@ Latex Merge File
def 寻找Latex主文件(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个。
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
P.S. 但愿没人把latex模板放在里面传进来
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
return texf
else:
continue
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
@ -185,7 +132,6 @@ def rm_comments(main_file):
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
@ -232,11 +178,9 @@ def merge_tex_files(project_foler, main_file, mode):
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find paper abstract section!"
return main_file
@ -268,8 +212,6 @@ def fix_content(final_tex, node_string):
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
@ -317,34 +259,45 @@ def split_subprocess(txt, project_folder, return_dict, opts):
mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
# 吸收title与作者以上的部分
text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
# 吸收iffalse注释
text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
# 吸收在42行以内的begin-end组合
text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
text, mask = split_worker(text, mask, r"(.*?)\\maketitle", re.DOTALL)
# 删除iffalse注释
text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
# 吸收在25行以内的begin-end组合
text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
# 吸收匿名公式
text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
# 吸收其他杂项
text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
# reverse 操作必须放在最后
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\item ")
text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
text, mask = split_worker_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
root = convert_to_linklist(text, mask)
# 修复括号
@ -418,7 +371,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
prev_node = node
node = node.next
if node is None: break
# 输出html调试文件用红色标注处保留区PRESERVE用黑色标注转换区TRANSFORM
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
segment_parts_for_gpt = []
nodes = []
@ -449,7 +402,7 @@ class LatexPaperSplit():
"""
def __init__(self) -> None:
self.nodes = None
self.msg = "*{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \
self.msg = "{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
@ -470,14 +423,8 @@ class LatexPaperSplit():
if mode == 'translate_zh':
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(result_string)
if not match:
# match \abstract{xxxx}
pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match = pattern_compile.search(result_string)
position = match.regs[1][0]
else:
# match \begin{abstract}xxxx\end{abstract}
position = match.end()
assert match is not None, "Cannot find paper abstract section!"
position = match.end()
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
return result_string
@ -496,7 +443,6 @@ class LatexPaperSplit():
args=(txt, project_folder, return_dict, opts))
p.start()
p.join()
p.close()
self.nodes = return_dict['nodes']
self.sp = return_dict['segment_parts_for_gpt']
return self.sp
@ -551,11 +497,11 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
def write_html(sp_file_contents, sp_file_result, chatbot):
# write html
try:
import shutil
import copy
from .crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
@ -573,7 +519,6 @@ def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
@ -654,7 +599,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
pfg.get_token_num = None
objdump(pfg, file=pj(project_folder,'temp.pkl'))
write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}"
@ -691,9 +636,10 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1]
def compile_latex_with_timeout(command, cwd, timeout=60):
def compile_latex_with_timeout(command, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
@ -717,24 +663,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
# 只有第二步成功,才能继续下面的步骤
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir)
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
if mode!='translate_zh':
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
@ -742,11 +688,13 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
# <--------------------->
os.chdir(current_dir)
# <---------- 检查结果 ----------->
results_ = ""
@ -758,15 +706,13 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
yield from update_ui_lastest_msg(f'{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
if diff_pdf_success:
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
os.chdir(current_dir)
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
return True # 成功啦
else:
if n_fix>=max_try: break
@ -782,6 +728,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break
os.chdir(current_dir)
return False # 失败啦

View File

@ -13,9 +13,7 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
# 的长度必须小于 2500 个 Token
file_content, page_one = read_and_clean_pdf_text(file_name) # 尝试按照章节切割PDF
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
TOKEN_LIMIT_PER_FRAGMENT = 2500
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf

View File

@ -1,102 +0,0 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llm.bridge_all import model_info
def bing_search(query, proxies=None):
query = query
url = f"https://cn.bing.com/search?q={query}"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
response = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(response.content, 'html.parser')
results = []
for g in soup.find_all('li', class_='b_algo'):
anchors = g.find_all('a')
if anchors:
link = anchors[0]['href']
if not link.startswith('http'):
continue
title = g.find('h2').text
item = {'title': title, 'link': link}
results.append(item)
for r in results:
print(r['link'])
return results
def scrape_text(url, proxies) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
'Content-Type': 'text/plain',
}
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
except:
return "无法连接到该网页"
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
@CatchException
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
"[Local Message] 请注意,您正在调用一个[函数插件]的模板该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者它可以作为创建新功能函数的模板。您若希望分享新的功能模组请不吝PR"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies, = get_conf('proxies')
urls = bing_search(txt, proxies)
history = []
# ------------- < 第2步依次访问网页 > -------------
max_search_result = 8 # 最多收纳多少个网页的结果
for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies)
history.extend([f"{index}份搜索结果:", res])
chatbot.append([f"{index}份搜索结果:", res[:500]+"......"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第3步ChatGPT综合 > -------------
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
i_say, history = input_clipping( # 裁剪输入从最长的条目开始裁剪防止爆token
inputs=i_say,
history=history,
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@ -13,11 +13,11 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((txt, "正在同时咨询gpt-3.5和gpt-4……"))
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = 'gpt-3.5-turbo&gpt-4' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,

View File

@ -104,7 +104,7 @@ def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
meta_paper_info_list = meta_paper_info_list[batchsize:]
chatbot.append(["状态?",
"已经全部完成您可以试试让AI写一个Related Works例如您可以继续输入Write an academic \"Related Works\" section about \"你搜索的研究领域\" for me."])
"已经全部完成您可以试试让AI写一个Related Works例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
msg = '正常'
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history)

View File

@ -1,7 +1,6 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime, re
import datetime
@CatchException
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
@ -19,34 +18,12 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
for i in range(5):
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
i_say = f'历史中哪些事件发生在{currentMonth}{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}'
i_say = f'历史中哪些事件发生在{currentMonth}{currentDay}日?列举两条并发送相关图片。发送图片时,使用Markdown将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt='输出格式示例1908年美国消防救援事业发展的“美国消防协会”成立。关键词{"KeyWords":["Fire","American"]}。'
sys_prompt="当你想发送一张照片时请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
)
gpt_say = get_images(gpt_say)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
def get_images(gpt_say):
def get_image_by_keyword(keyword):
import requests
from bs4 import BeautifulSoup
response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
if "data-src" in image_element: break
return image_element["data-src"]
for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say):
keywords = [n.strip('"') for n in keywords.split(',')]
try:
description = keywords[0]
url = get_image_by_keyword(keywords[0])
img_tag = f"\n\n![{description}]({url})"
gpt_say += img_tag
except:
continue
return gpt_say

View File

@ -103,30 +103,3 @@ services:
echo '[jittorllms] 正在从github拉取最新代码...' &&
git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force &&
python3 -u main.py"
## ===================================================
## 【方案四】 chatgpt + Latex
## ===================================================
version: '3'
services:
gpt_academic_with_latex:
image: ghcr.io/binary-husky/gpt_academic_with_latex:master
environment:
# 请查阅 `config.py` 以查看所有的配置信息
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
USE_PROXY: ' True '
proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } '
LLM_MODEL: ' gpt-3.5-turbo '
AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "gpt-4"] '
LOCAL_MODEL_DEVICE: ' cuda '
DEFAULT_WORKER_NUM: ' 10 '
WEB_PORT: ' 12303 '
# 与宿主的网络融合
network_mode: "host"
# 不使用代理网络拉取最新代码
command: >
bash -c "python3 -u main.py"

Binary file not shown.

View File

@ -1,152 +0,0 @@
# 通过微软Azure云服务申请 Openai API
由于Openai和微软的关系现在是可以通过微软的Azure云计算服务直接访问openai的api免去了注册和网络的问题。
快速入门的官方文档的链接是:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
# 申请API
按文档中的“先决条件”的介绍,出了编程的环境以外,还需要以下三个条件:
1.  Azure账号并创建订阅
2.  为订阅添加Azure OpenAI 服务
3.  部署模型
## Azure账号并创建订阅
### Azure账号
创建Azure的账号时最好是有微软的账号这样似乎更容易获得免费额度第一个月的200美元实测了一下如果用一个刚注册的微软账号登录Azure的话并没有这一个月的免费额度
创建Azure账号的网址是[立即创建 Azure 免费帐户 | Microsoft Azure](https://azure.microsoft.com/zh-cn/free/)
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_944786_iH6AECuZ_tY0EaBd_1685327219?w=1327\&h=695\&type=image/png)
打开网页后,点击 “免费开始使用” 会跳转到登录或注册页面,如果有微软的账户,直接登录即可,如果没有微软账户,那就需要到微软的网页再另行注册一个。
注意Azure的页面和政策时不时会变化已实际最新显示的为准就好。
### 创建订阅
注册好Azure后便可进入主页
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_444847_tk-9S-pxOYuaLs_K_1685327675?w=1865\&h=969\&type=image/png)
首先需要在订阅里进行添加操作,点开后即可进入订阅的页面:
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_612820_z_1AlaEgnJR-rUl0_1685327892?w=1865\&h=969\&type=image/png)
第一次进来应该是空的点添加即可创建新的订阅可以是“免费”或者“即付即用”的订阅其中订阅ID是后面申请Azure OpenAI需要使用的。
## 为订阅添加Azure OpenAI服务
之后回到首页点Azure OpenAI即可进入OpenAI服务的页面如果不显示的话则在首页上方的搜索栏里搜索“openai”即可
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_269759_nExkGcPC0EuAR5cp_1685328130?w=1865\&h=969\&type=image/png)
不过现在这个服务还不能用。在使用前,还需要在这个网址申请一下:
[Request Access to Azure OpenAI Service (microsoft.com)](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu)
这里有二十来个问题,按照要求和自己的实际情况填写即可。
其中需要注意的是
1.  千万记得填对"订阅ID"
2.  需要填一个公司邮箱(可以不是注册用的邮箱)和公司网址
之后,在回到上面那个页面,点创建,就会进入创建页面了:
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_72708_9d9JYhylPVz3dFWL_1685328372?w=824\&h=590\&type=image/png)
需要填入“资源组”和“名称”,按照自己的需要填入即可。
完成后,在主页的“资源”里就可以看到刚才创建的“资源”了,点击进入后,就可以进行最后的部署了。
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_871541_CGCnbgtV9Uk1Jccy_1685329861?w=1217\&h=628\&type=image/png)
## 部署模型
进入资源页面后,在部署模型前,可以先点击“开发”,把密钥和终结点记下来。
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_852567_dxCZOrkMlWDSLH0d_1685330736?w=856\&h=568\&type=image/png)
之后,就可以去部署模型了,点击“部署”即可,会跳转到 Azure OpenAI Stuido 进行下面的操作:
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_169225_uWs1gMhpNbnwW4h2_1685329901?w=1865\&h=969\&type=image/png)
进入 Azure OpenAi Studio 后,点击新建部署,会弹出如下对话框:
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_391255_iXUSZAzoud5qlxjJ_1685330224?w=656\&h=641\&type=image/png)
在这里选 gpt-35-turbo 或需要的模型并按需要填入“部署名”即可完成模型的部署。
![](https://wdcdn.qpic.cn/MTY4ODg1Mjk4NzI5NTU1NQ_724099_vBaHcUilsm1EtPgK_1685330396?w=1869\&h=482\&type=image/png)
这个部署名需要记下来。
到现在为止,申请操作就完成了,需要记下来的有下面几个东西:
 密钥1或2都可以
● 终结点
● 部署名(不是模型名)
# 修改 config.py
```
AZURE_ENDPOINT = "填入终结点"
AZURE_API_KEY = "填入azure openai api的密钥"
AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改
AZURE_ENGINE = "填入部署名"
```
# API的使用
接下来就是具体怎么使用API了还是可以参考官方文档[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
和openai自己的api调用有点类似都需要安装openai库不同的是调用方式
```
import openai
openai.api_type = "azure" #固定格式,无需修改
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点”
openai.api_version = "2023-05-15" #固定格式,无需修改
openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2”
response = openai.ChatCompletion.create(
engine="gpt-35-turbo", #这里填入的不是模型名,是部署名
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
{"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
{"role": "user", "content": "Do other Azure Cognitive Services support this too?"}
]
)
print(response)
print(response['choices'][0]['message']['content'])
```
需要注意的是:
1.  engine那里填入的是部署名不是模型名
2.  通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同,不需要 decode已经是解析好的 json 了,直接根据键值读取即可。
更细节的使用方法详见官方API文档。
# 关于费用
Azure OpenAI API 还是需要一些费用的免费订阅只有1个月有效期费用如下
![image.png](https://note.youdao.com/yws/res/18095/WEBRESOURCEeba0ab6d3127b79e143ef2d5627c0e44)
具体可以可以看这个网址 [Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable)
并非网上说的什么“一年白嫖”但注册方法以及网络问题都比直接使用openai的api要简单一些。

View File

@ -1,10 +1,8 @@
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
def main():
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
import gradio as gr
if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "用 pip install -r requirements.txt 安装依赖"
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖详情信息见requirements.txt"
from request_llm.bridge_all import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
@ -56,7 +54,6 @@ def main():
cancel_handles = []
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
gr.HTML(title_html)
gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”Duplicate Space按钮<font color="#FF00FF">使用时先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”Duplicate Space之前填入API_KEY或进行提问否则您的API_KEY将极可能被空间所有者攫取<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
with gr_L1():
with gr_L2(scale=2):
@ -66,7 +63,7 @@ def main():
with gr_L2(scale=1):
with gr.Accordion("输入区", open=True) as area_input_primary:
with gr.Row():
txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥输入多个密钥时用英文逗号间隔。支持OpenAI密钥和API2D密钥共存。").style(container=False)
txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False)
with gr.Row():
submitBtn = gr.Button("提交", variant="primary")
with gr.Row():
@ -200,7 +197,10 @@ def main():
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
auto_opentab_delay()
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
server_name="0.0.0.0", server_port=PORT,
favicon_path="docs/logo.png", auth=AUTHENTICATION,
blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
# 如果需要在二级路径下运行
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')

View File

@ -16,9 +16,6 @@ from toolbox import get_conf, trimmed_format_exc
from .bridge_chatgpt import predict_no_ui_long_connection as chatgpt_noui
from .bridge_chatgpt import predict as chatgpt_ui
from .bridge_azure_test import predict_no_ui_long_connection as azure_noui
from .bridge_azure_test import predict as azure_ui
from .bridge_chatglm import predict_no_ui_long_connection as chatglm_noui
from .bridge_chatglm import predict as chatglm_ui
@ -96,24 +93,6 @@ model_info = {
"token_cnt": get_token_num_gpt35,
},
"gpt-3.5-turbo-0613": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"gpt-3.5-turbo-16k-0613": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
"endpoint": openai_endpoint,
"max_token": 1024 * 16,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"gpt-4": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
@ -123,16 +102,6 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
# azure openai
"azure-gpt35":{
"fn_with_ui": azure_ui,
"fn_without_ui": azure_noui,
"endpoint": get_conf("AZURE_ENDPOINT"),
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# api_2d
"api2d-gpt-3.5-turbo": {
"fn_with_ui": chatgpt_ui,
@ -152,7 +121,7 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
# chatglm 直接对齐到 chatglm2
# chatglm
"chatglm": {
"fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui,
@ -161,15 +130,6 @@ model_info = {
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"chatglm2": {
"fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui,
"endpoint": None,
"max_token": 1024,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# newbing
"newbing": {
"fn_with_ui": newbing_ui,

View File

@ -1,241 +0,0 @@
"""
该文件中主要包含三个函数
不具备多线程能力的函数:
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui高级实验性功能模块调用不会实时显示在界面上参数简单可以多线程并行方便实现复杂的功能逻辑
3. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
"""
import logging
import traceback
import importlib
import openai
import time
# 读取config.py文件中关于AZURE OPENAI API的信息
from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc
TIMEOUT_SECONDS, MAX_RETRY, AZURE_ENGINE, AZURE_ENDPOINT, AZURE_API_VERSION, AZURE_API_KEY = \
get_conf('TIMEOUT_SECONDS', 'MAX_RETRY',"AZURE_ENGINE","AZURE_ENDPOINT", "AZURE_API_VERSION", "AZURE_API_KEY")
def get_full_error(chunk, stream_response):
"""
获取完整的从Openai返回的报错
"""
while True:
try:
chunk += next(stream_response)
except:
break
return chunk
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
发送至azure openai api流式获取输出。
用于基础的对话功能。
inputs 是本次问询的输入
top_p, temperature是chatGPT的内部调优参数
history 是之前的对话列表注意无论是inputs还是history内容太长了都会触发token数量溢出的错误
chatbot 为WebUI中显示的对话列表修改它然后yeild出去可以直接修改对话界面内容
additional_fn代表点击的哪个按钮按钮见functional.py
"""
print(llm_kwargs["llm_model"])
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
raw_input = inputs
logging.info(f'[raw_input] {raw_input}')
chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream)
history.append(inputs); history.append("")
retry = 0
while True:
try:
openai.api_type = "azure"
openai.api_version = AZURE_API_VERSION
openai.api_base = AZURE_ENDPOINT
openai.api_key = AZURE_API_KEY
response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
except:
retry += 1
chatbot[-1] = ((chatbot[-1][0], "获取response失败重试中。。。"))
retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
if retry > MAX_RETRY: raise TimeoutError
gpt_replying_buffer = ""
is_head_of_the_stream = True
if stream:
stream_response = response
while True:
try:
chunk = next(stream_response)
except StopIteration:
from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk)}")
yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk) # 刷新界面
return
if is_head_of_the_stream and (r'"object":"error"' not in chunk):
# 数据流的第一帧不携带content
is_head_of_the_stream = False; continue
if chunk:
#print(chunk)
try:
if "delta" in chunk["choices"][0]:
if chunk["choices"][0]["finish_reason"] == "stop":
logging.info(f'[response] {gpt_replying_buffer}')
break
status_text = f"finish_reason: {chunk['choices'][0]['finish_reason']}"
gpt_replying_buffer = gpt_replying_buffer + chunk["choices"][0]["delta"]["content"]
history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
except Exception as e:
traceback.print_exc()
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
chunk = get_full_error(chunk, stream_response)
error_msg = chunk
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
return
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
"""
发送至AZURE OPENAI API等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs
是本次问询的输入
sys_prompt:
系统静默prompt
llm_kwargs
chatGPT的内部调优参数
history
是之前的对话列表
observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
"""
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0
while True:
try:
openai.api_type = "azure"
openai.api_version = AZURE_API_VERSION
openai.api_base = AZURE_ENDPOINT
openai.api_key = AZURE_API_KEY
response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
except:
retry += 1
traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response
result = ''
while True:
try: chunk = next(stream_response)
except StopIteration:
break
except:
chunk = next(stream_response) # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk, stream_response)
if "reduce the length" in error_msg:
raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg)
else:
raise RuntimeError("AZURE OPENAI API拒绝了请求" + error_msg)
if ('data: [DONE]' in chunk): break
delta = chunk["delta"]
if len(delta) == 0: break
if "role" in delta: continue
if "content" in delta:
result += delta["content"]
if not console_slience: print(delta["content"], end='')
if observe_window is not None:
# 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += delta["content"]
# 看门狗,如果超过期限没有喂狗,则终止
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。")
else: raise RuntimeError("意外Json结构"+delta)
if chunk['finish_reason'] == 'length':
raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。")
return result
def generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""
整合所有信息选择LLM模型生成 azure openai api请求为发送请求做准备
"""
conversation_cnt = len(history) // 2
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now)
payload = {
"model": llm_kwargs['llm_model'],
"messages": messages,
"temperature": llm_kwargs['temperature'], # 1.0,
"top_p": llm_kwargs['top_p'], # 1.0,
"n": 1,
"stream": stream,
"presence_penalty": 0,
"frequency_penalty": 0,
"engine": AZURE_ENGINE
}
try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except:
print('输入中可能存在乱码。')
return payload

View File

@ -40,12 +40,12 @@ class GetGLMHandle(Process):
while True:
try:
if self.chatglm_model is None:
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE')
if device=='cpu':
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
else:
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval()
break
else:

View File

@ -1,3 +1,4 @@
./docs/gradio-3.32.2-py3-none-any.whl
tiktoken>=0.3.3
requests[socks]
transformers
@ -14,4 +15,4 @@ pymupdf
openai
numpy
arxiv
rich
rich

View File

@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
else:
report_files = find_recent_files('gpt_log')
if len(report_files) == 0:
return cookies, None, chatbot
return None, chatbot
# files.extend(report_files)
file_links = ''
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
@ -842,4 +842,4 @@ def objload(file='objdump.tmp'):
return
with open(file, 'rb') as f:
return pickle.load(f)

View File

@ -1,5 +1,5 @@
{
"version": 3.42,
"version": 3.41,
"show_feature": true,
"new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
"new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
}