Compare commits

..

202 Commits

Author SHA1 Message Date
6df33c95d4 Merge pull request #841 from KelvinF97/master
Optimize some code and fix some bugs
2023-07-01 22:31:28 +08:00
59877dd728 Local variable 'result' might be referenced before assignment, add else result 2023-07-01 22:27:11 +08:00
a4f187b8dc Merge branch 'master' into master 2023-07-01 22:19:53 +08:00
5f7ffef238 增加基础功能判空 2023-07-01 22:04:42 +08:00
41c10f5688 report image generation error in UI 2023-07-01 02:28:32 +08:00
d7ac99f603 更正错误提示 2023-07-01 01:46:43 +08:00
1616daae6a Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master 2023-07-01 00:17:30 +08:00
a1092d8f92 提供自动清空输入框的选项 2023-07-01 00:17:26 +08:00
34ca9f138f Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-30 14:56:28 +08:00
df3f1aa3ca 更正ChatGLM2的默认Token数量 2023-06-30 14:56:22 +08:00
bf805cf477 Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master 2023-06-30 13:09:51 +08:00
ecb08e69be remove find picture core functionality 2023-06-30 13:08:54 +08:00
28c1e3f11b Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-30 12:06:33 +08:00
403667aec1 upgrade chatglm to chatglm2 2023-06-30 12:06:28 +08:00
22f377e2fb fix multi user cwd shift 2023-06-30 11:05:47 +08:00
37172906ef 修复文件导出的bug 2023-06-29 14:55:55 +08:00
3b78e0538b 修复插件demo的图像显示的问题 2023-06-29 14:52:58 +08:00
d8f9ac71d0 Merge pull request #907 from Xminry/master
feat:联网搜索功能,cn.bing.com版,国内可用
2023-06-29 12:44:32 +08:00
aced272d3c 微调插件提示 2023-06-29 12:43:50 +08:00
aff77a086d Merge branch 'master' of https://github.com/Xminry/gpt_academic into Xminry-master 2023-06-29 12:38:43 +08:00
49253c4dc6 [arxiv trans] add html comparison to zip file 2023-06-29 12:29:49 +08:00
1a00093015 修复提示 2023-06-29 12:15:52 +08:00
64f76e7401 3.42 2023-06-29 11:32:19 +08:00
eb4c07997e 修复Latex矫错和本地Latex论文翻译的问题 2023-06-29 11:30:42 +08:00
99cf7205c3 feat:联网搜索功能,cn.bing.com版,国内可用 2023-06-28 10:30:08 +08:00
d684b4cdb3 Merge pull request #905 from Xminry/master
Update 理解PDF文档内容.py
2023-06-27 23:37:25 +08:00
601a95c948 Merge pull request #881 from OverKit/master
update latex_utils.py
2023-06-27 19:20:17 +08:00
e18bef2e9c add item breaker 2023-06-27 19:16:05 +08:00
f654c1af31 merge regex expressions 2023-06-27 18:59:56 +08:00
e90048a671 Merge branch 'master' of https://github.com/OverKit/gpt_academic into OverKit-master 2023-06-27 16:14:12 +08:00
ea624b1510 Merge pull request #889 from dackdawn/master
添加0613模型的声明
2023-06-27 15:03:15 +08:00
057e3dda3c Merge branch 'master' of https://github.com/dackdawn/gpt_academic into dackdawn-master 2023-06-27 15:02:22 +08:00
4290821a50 Update 理解PDF文档内容.py 2023-06-27 01:57:31 +08:00
280e14d7b7 更新Latex模块的docker-compose 2023-06-26 09:59:14 +08:00
9f0cf9fb2b arxiv PDF 引用 2023-06-25 23:30:31 +08:00
b8560b7510 修正误判latex模板文件的bug 2023-06-25 22:46:16 +08:00
d841d13b04 add arxiv translation test samples 2023-06-25 22:12:44 +08:00
efda9e5193 Merge pull request #897 from Ranhuiryan/master
添加azure-gpt35选项
2023-06-24 17:59:51 +10:00
33d2e75aac add azure-gpt35 to model list 2023-06-21 16:19:49 +08:00
74941170aa update azure use instruction 2023-06-21 16:19:26 +08:00
cd38949903 当遇到错误时,回滚到原文 2023-06-21 11:53:57 +10:00
d87f1eb171 更新接入azure的说明 2023-06-21 11:38:59 +10:00
cd1e4e1ba7 Merge pull request #797 from XiaojianTang/master
增加azure openai api的支持
2023-06-21 11:23:41 +10:00
cf5f348d70 update test samples 2023-06-21 11:20:31 +10:00
0ee25f475e Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-20 23:07:51 +08:00
1fede6df7f temp 2023-06-20 23:05:17 +08:00
22a65cd163 Create build-with-latex.yml 2023-06-21 00:55:24 +10:00
538b041ea3 Merge pull request #890 from Mcskiller/master
Update README.md
2023-06-21 00:53:26 +10:00
d7b056576d add latex docker-compose 2023-06-21 00:52:58 +10:00
cb0bb6ab4a fix minor bugs 2023-06-21 00:41:33 +10:00
bf955aaf12 fix bugs 2023-06-20 23:12:30 +10:00
61eb0da861 fix encoding bug 2023-06-20 22:08:09 +10:00
5da633d94d Update README.md
Fix the error URL for the git clone.
2023-06-20 19:10:11 +08:00
f3e4e26e2f 添加0613模型的声明
openai对gpt-3.5-turbo的RPM限制是3,而gpt-3.5-turbo-0613的RPM是60,虽然两个模型的内容是一致的,但是选定特定模型可以获得更高的RPM和TPM
2023-06-19 21:40:26 +08:00
af7734dd35 avoid file fusion 2023-06-19 16:57:11 +10:00
d5bab093f9 rename function names 2023-06-19 15:17:33 +10:00
f94b167dc2 Merge branch 'master' into overkit-master 2023-06-19 14:53:51 +10:00
951d5ec758 Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-19 14:52:25 +10:00
016d8ee156 Merge remote-tracking branch 'origin/master' into OverKit-master 2023-06-19 14:51:59 +10:00
dca9ec4bae Merge branch 'master' of https://github.com/OverKit/gpt_academic into OverKit-master 2023-06-19 14:49:50 +10:00
a06e43c96b Update README.md 2023-06-18 16:15:37 +08:00
29c6bfb6cb Update README.md 2023-06-18 16:12:06 +08:00
8d7ee975a0 Update README.md 2023-06-18 16:10:45 +08:00
4bafbb3562 Update Latex输出PDF结果.py 2023-06-18 15:54:23 +08:00
7fdf0a8e51 调整区分内容的代码 2023-06-18 15:51:29 +08:00
2bb13b4677 Update README.md 2023-06-18 15:44:42 +08:00
9a5a509dd9 修复关于abstract的搜索 2023-06-17 19:27:21 +08:00
cbcb98ef6a Merge pull request #872 from Skyzayre/master
Update README.md
2023-06-16 17:54:39 +08:00
bb864c6313 增加一些提示文字 2023-06-16 17:33:19 +08:00
6d849eeb12 修复Langchain插件的bug 2023-06-16 17:33:03 +08:00
ef752838b0 Update README.md 2023-06-15 02:07:43 +08:00
73d4a1ff4b Update README.md 2023-06-14 10:15:47 +08:00
8c62f21aa6 3.41增加gpt-3.5-16k的支持 2023-06-14 09:57:09 +08:00
c40ebfc21f 将gpt-3.5-16k作为加入支持列表 2023-06-14 09:50:15 +08:00
c365ea9f57 Update README.md 2023-06-13 16:13:19 +08:00
12d66777cc Merge pull request #864 from OverKit/master
check letter % after removing spaces or tabs in the left
2023-06-12 15:21:35 +08:00
9ac3d0d65d check letter % after removing spaces or tabs in the left 2023-06-12 10:09:52 +08:00
9fd212652e 专业词汇声明 2023-06-12 09:45:59 +08:00
790a1cf12a 添加一些提示 2023-06-11 20:12:25 +08:00
3ecf2977a8 修复caption翻译 2023-06-11 18:23:54 +08:00
aeddf6b461 Update Latex输出PDF结果.py 2023-06-11 10:20:49 +08:00
ce0d8b9dab 虚空终端插件雏形 2023-06-11 01:36:23 +08:00
3c00e7a143 file link in chatbot 2023-06-10 21:45:38 +08:00
ef1bfdd60f update pip install notice 2023-06-08 21:29:10 +08:00
e48d92e82e update translation 2023-06-08 18:34:06 +08:00
110510997f Update README.md 2023-06-08 12:48:52 +08:00
b52695845e Update README.md 2023-06-08 12:44:05 +08:00
f30c9c6d3b Update README.md 2023-06-08 12:43:13 +08:00
ff5403eac6 Update README.md 2023-06-08 12:42:24 +08:00
f9226d92be Update version 2023-06-08 12:24:14 +08:00
a0ea5d0e9e Update README.md 2023-06-08 12:22:03 +08:00
ce6f11d200 Update README.md 2023-06-08 12:20:49 +08:00
10b3001dba Update README.md 2023-06-08 12:19:11 +08:00
e2de1d76ea Update README.md 2023-06-08 12:18:31 +08:00
77cc141a82 Update README.md 2023-06-08 12:14:02 +08:00
526b4d8ecd Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-07 11:09:20 +08:00
149db621ec langchain check depends 2023-06-07 11:09:12 +08:00
2e1bb7311c Merge pull request #848 from MengDanzz/master
将Dockerfile COPY分成两段,缓存依赖库,重新构建不需要重新安装
2023-06-07 10:44:09 +08:00
dae65fd2c2 在copy ..后在运行一次pip install检查依赖变化 2023-06-07 10:43:45 +08:00
9aafb2ee47 非pypi包加入COPY 2023-06-07 09:18:57 +08:00
6bc91bd02e Merge branch 'binary-husky:master' into master 2023-06-07 09:15:44 +08:00
8ef7344101 fix subprocess bug in Windows 2023-06-06 18:57:52 +08:00
40da1b0afe 将Latex分解程序放到子进程执行 2023-06-06 18:44:00 +08:00
2da36c7667 Optimizing the code, requests. exceptions. ConnectionError should be written in the post request instead of reading from the iterator. If the post request is unsuccessful, it will not be executed to the iterator step. 2023-06-06 16:10:54 +08:00
c65def90f3 将Dockerfile COPY分成两段,缓存依赖库,重新构建不需要重新安装 2023-06-06 14:36:30 +08:00
0e1de5a184 Optimize the code to make it more readable, catch other exceptions, and avoid response contentless reading exceptions 2023-06-06 10:57:52 +08:00
344579fa79 Every time the function is called, if the list parameter is not explicitly passed, the same default list will be used. This leads to the sharing of the same list object between function calls, resulting in a cumulative effect. 2023-06-06 10:31:28 +08:00
6d7ee17dbd Add zh_ Langchain into dependent files 2023-06-06 09:37:04 +08:00
0a83ba91e9 Add langchain & For safety reasons, try not to use compilation and installation packages from unknown sources or make changes/ Docs/graphic 3.32.2 py3 none any. whl is graphic>=3.33.1 2023-06-06 09:22:50 +08:00
ffd7363c4c need more detailed and comprehensive exception information, it is usually recommended to use the exception object e. Stack trace information can be used as a supplement to understand the context and call relationship of the exception occurrence 2023-06-06 09:13:13 +08:00
b538d31b13 Str is a built-in type and cannot be used directly as a variable name 2023-06-06 09:07:53 +08:00
543a8b98e9 Local variable 'result' might be referenced before assignment, add else result 2023-06-06 08:41:55 +08:00
55c6e9c59a Specify the proxy input type and use the get method to obtain dictionary data to avoid exceptions. Change the timeout to 30 seconds to avoid failures caused by network fluctuations. Obtain abnormal parameters and display them to the front-end for easy troubleshooting 2023-06-06 08:37:37 +08:00
0fc8f740d0 Fix PEP 8: E302 expected 2 blank lines, found 1 & PEP 8: E303 too many blank lines (4) 2023-06-06 08:29:26 +08:00
a019a64e65 PEP 8: E302 expected 2 blank lines, found 1 2023-06-06 08:26:54 +08:00
a75ae327e7 Make it comply with the PEP8 standard and improve PEP 8: E401 multiple imports on one line and PEP 8: E701 multiple statements on one line (colon) 2023-06-06 08:23:56 +08:00
3c38fad4aa PEP 8: E251 unexpected spaces around keyword / parameter equals 2023-06-06 08:14:21 +08:00
bf9731e937 Fix the issue of PEP 8: E401 multiple imports on one line 2023-06-06 08:07:50 +08:00
0f6e3e2dbb Fix the issue of ineffective transfer of reference history 2023-06-06 08:06:46 +08:00
ddeaf76422 check latex in PATH 2023-06-06 00:23:00 +08:00
f23b66dec2 update Dockerfile with Latex 2023-06-05 23:49:54 +08:00
a26b294817 Write Some Docstring 2023-06-05 23:44:59 +08:00
66018840da declare resp 2023-06-05 23:24:41 +08:00
cea2144f34 fix test samples 2023-06-05 23:11:21 +08:00
7f5be93c1d 修正一些正则匹配bug 2023-06-05 22:57:39 +08:00
85b838b302 add Linux support 2023-06-04 23:06:35 +08:00
27f97ba92a remove previous results 2023-06-04 16:55:36 +08:00
14269eba98 建立本地arxiv缓存区 2023-06-04 16:08:01 +08:00
d5c9bc9f0a 提高iffalse搜索优先级 2023-06-04 14:15:59 +08:00
b0fed3edfc consider iffalse state 2023-06-04 14:06:02 +08:00
7296d054a2 patch latex segmentation 2023-06-04 13:56:15 +08:00
d57c7d352d improve quality 2023-06-03 23:54:30 +08:00
3fd2927ea3 改善 2023-06-03 23:33:45 +08:00
b745074160 avoid most compile failure 2023-06-03 23:33:32 +08:00
70ee810133 improve success rate 2023-06-03 19:39:19 +08:00
68fea9e79b fix test 2023-06-03 18:09:39 +08:00
f82bf91aa8 test example 2023-06-03 18:06:39 +08:00
dde9edcc0c fix a fatal mistake 2023-06-03 17:49:22 +08:00
66c78e459e 修正提示 2023-06-03 17:18:38 +08:00
de54102303 修改提醒 2023-06-03 16:43:26 +08:00
7c7d2d8a84 Latex的minipage补丁 2023-06-03 16:16:32 +08:00
834f989ed4 考虑有人用input不加.tex的情况 2023-06-03 15:42:22 +08:00
b658ee6e04 修复arxiv翻译的一些问题 2023-06-03 15:36:55 +08:00
1a60280ea0 添加警告 2023-06-03 14:40:37 +08:00
991cb7d272 warning 2023-06-03 14:39:40 +08:00
463991cfb2 fix bug 2023-06-03 14:24:06 +08:00
06f10b5fdc fix zh cite bug 2023-06-03 14:17:58 +08:00
d275d012c6 Merge branch 'langchain' into master 2023-06-03 13:53:39 +08:00
c5d1ea3e21 update langchain version 2023-06-03 13:53:34 +08:00
0022b92404 update prompt 2023-06-03 13:50:39 +08:00
ef61221241 latex auto translation milestone 2023-06-03 13:46:40 +08:00
5a1831db98 成功! 2023-06-03 00:34:23 +08:00
a643f8b0db debug translation 2023-06-02 23:06:01 +08:00
601712fd0a latex toolchain 2023-06-02 21:44:11 +08:00
e769f831c7 latex 2023-06-02 14:07:04 +08:00
dcd952671f Update main.py 2023-06-01 15:56:52 +08:00
06564df038 Merge branch 'langchain' 2023-06-01 09:39:34 +08:00
2f037f30d5 暂时移除插件锁定 2023-06-01 09:39:00 +08:00
efedab186d Merge branch 'master' into langchain 2023-06-01 00:10:22 +08:00
f49cae5116 Update Langchain知识库.py 2023-06-01 00:09:07 +08:00
2b620ccf2e 更新提示 2023-06-01 00:07:19 +08:00
a1b7a4da56 更新测试案例 2023-06-01 00:03:27 +08:00
61b0e49fed fix some bugs in linux 2023-05-31 23:49:25 +08:00
f60dc371db 12 2023-05-31 10:42:44 +08:00
0a3433b8ac Update README.md 2023-05-31 10:37:08 +08:00
31bce54abb Update README.md 2023-05-31 10:34:21 +08:00
5db1530717 Merge branch 'langchain' of github.com:binary-husky/chatgpt_academic into langchain 2023-05-30 20:08:47 +08:00
c32929fd11 Merge branch 'master' into langchain 2023-05-30 20:08:15 +08:00
3e4c2b056c knowledge base 2023-05-30 19:55:38 +08:00
e79e9d7d23 Merge branch 'master' into langchain 2023-05-30 18:31:39 +08:00
d175b93072 Update README.md.Italian.md 2023-05-30 17:27:41 +08:00
ed254687d2 Update README.md.Italian.md 2023-05-30 17:26:12 +08:00
c0392f7074 Update README.md.Korean.md 2023-05-30 17:25:32 +08:00
f437712af7 Update README.md.Portuguese.md 2023-05-30 17:22:46 +08:00
6d1ea643e9 langchain 2023-05-30 12:54:42 +08:00
9e84cfcd46 Update README.md 2023-05-29 19:48:34 +08:00
897695d29f 修复二级路径的文件屏蔽 2023-05-28 20:25:35 +08:00
1dcc2873d2 修复Gradio配置泄露的问题 2023-05-28 20:23:47 +08:00
42cf738a31 修复一些情况下复制键失效的问题 2023-05-28 18:12:48 +08:00
e4646789af Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-05-28 16:07:29 +08:00
e6c3aabd45 docker-compose check 2023-05-28 16:07:24 +08:00
6789d1fab4 Update README.md 2023-05-28 11:21:50 +08:00
7a733f00a2 Update README.md 2023-05-28 00:19:23 +08:00
dd55888f0e Update README.md 2023-05-28 00:16:45 +08:00
0327df22eb Update README.md 2023-05-28 00:14:54 +08:00
e544f5e9d0 Update README.md 2023-05-27 23:45:15 +08:00
0fad4f44a4 fix dockerfile 2023-05-27 23:36:42 +08:00
1240dd6f26 local gradio 2023-05-27 23:29:22 +08:00
d6be947177 修复gradio的依赖安装问题 2023-05-27 23:10:44 +08:00
3cfbdce9f2 remove limitation for now 2023-05-27 22:25:50 +08:00
1ee471ff57 fix reminder 2023-05-27 22:20:46 +08:00
25ccecf8e3 Update README.md 2023-05-27 21:56:43 +08:00
9e991bfa3e Update requirements.txt 2023-05-27 21:56:16 +08:00
221efd0193 Update README.md 2023-05-27 21:11:25 +08:00
976b9bf65f Update README.md 2023-05-27 21:04:52 +08:00
ae5783e383 修复gradio复制按钮BUG 2023-05-27 20:20:45 +08:00
30224af042 Merge pull request #798 from Bit0r/master
🐛 匹配latex注释的正则表达式
2023-05-27 14:03:07 +08:00
8ff7c15cd8 🐛 匹配latex注释的正则表达式 2023-05-27 11:19:48 +08:00
f3205994ea 增加azure openai api的支持 2023-05-26 23:22:12 +08:00
ec8cc48a4d Add ProxyNetworkActivate 2023-05-25 23:48:18 +08:00
5d75c578b9 fix dependency 2023-05-25 15:28:27 +08:00
cd411c2eea newbing-free deps 2023-05-25 15:12:54 +08:00
18 changed files with 308 additions and 216 deletions

View File

@ -1,15 +1,3 @@
---
title: ChatImprovement
emoji: 😻
colorFrom: blue
colorTo: blue
sdk: gradio
sdk_version: 3.32.0
app_file: app.py
pinned: false
---
# ChatGPT 学术优化
> **Note**
>
> 2023.5.27 对Gradio依赖进行了调整Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时请严格选择`requirements.txt`中**指定的版本**

View File

@ -1,10 +1,10 @@
def check_proxy(proxies):
def check_proxy(proxies: dict):
import requests
proxies_https = proxies['https'] if proxies is not None else ''
proxies_https = proxies.get('https') if proxies is not None else ''
try:
response = requests.get("https://ipapi.co/json/",
proxies=proxies, timeout=4)
proxies=proxies, timeout=30)
data = response.json()
print(f'查询代理的地理位置,返回的结果是{data}')
if 'country_name' in data:
@ -12,10 +12,12 @@ def check_proxy(proxies):
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
elif 'error' in data:
result = f"代理配置 {proxies_https}, 代理所在地未知IP查询频率受限"
else:
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
print(result)
return result
except:
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
except Exception as e:
result = f"代理 {proxies_https} 查询出现异常: {e},代理可能无效"
print(result)
return result

View File

@ -45,9 +45,10 @@ WEB_PORT = -1
# 如果OpenAI不响应网络卡顿、代理失败、KEY失效重试的次数限制
MAX_RETRY = 2
# OpenAI模型选择是gpt4现在只对申请成功的人开放
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
AVAIL_LLM_MODELS = ["newbing-free", "gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo"]
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
@ -55,6 +56,9 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
# 设置gradio的并行线程数不需要修改
CONCURRENT_COUNT = 100
# 是否在提交时自动清空输入框
AUTO_CLEAR_TXT = False
# 加一个live2d装饰
ADD_WAIFU = False

View File

@ -63,6 +63,7 @@ def get_core_functions():
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL" +
r"然后请使用Markdown格式封装并且不要有反斜线不要用代码块。现在请按以下描述给我发送图片" + "\n\n",
"Suffix": r"",
"Visible": False,
},
"解释代码": {
"Prefix": r"请解释以下代码:" + "\n```\n",
@ -73,6 +74,5 @@ def get_core_functions():
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
r"Items need to be transformed:",
"Suffix": r"",
"Visible": False,
}
}

View File

@ -193,8 +193,9 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2212.10156"
# txt = r"https://arxiv.org/abs/2211.11559"
# txt = r"https://arxiv.org/abs/2303.08774"
txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):

View File

@ -1,16 +1,19 @@
from toolbox import update_ui, get_conf, trimmed_format_exc
import threading
def input_clipping(inputs, history, max_token_limit):
import numpy as np
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
def get_token_num(txt):
return len(enc.encode(txt, disallowed_special=()))
mode = 'input-and-history'
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
input_token_num = get_token_num(inputs)
if input_token_num < max_token_limit//2:
if input_token_num < max_token_limit // 2:
mode = 'only-history'
max_token_limit = max_token_limit - input_token_num
@ -18,13 +21,13 @@ def input_clipping(inputs, history, max_token_limit):
everything.extend(history)
n_token = get_token_num('\n'.join(everything))
everything_token = [get_token_num(e) for e in everything]
delta = max(everything_token) // 16 # 截断时的颗粒度
delta = max(everything_token) // 16 # 截断时的颗粒度
while n_token > max_token_limit:
where = np.argmax(everything_token)
encoded = enc.encode(everything[where], disallowed_special=())
clipped_encoded = encoded[:len(encoded)-delta]
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
clipped_encoded = encoded[:len(encoded) - delta]
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
everything_token[where] = get_token_num(everything[where])
n_token = get_token_num('\n'.join(everything))
@ -35,12 +38,13 @@ def input_clipping(inputs, history, max_token_limit):
history = everything[1:]
return inputs, history
def request_gpt_model_in_new_thread_with_ui_alive(
inputs, inputs_show_user, llm_kwargs,
inputs, inputs_show_user, llm_kwargs,
chatbot, history, sys_prompt, refresh_interval=0.2,
handle_token_exceed=True,
handle_token_exceed=True,
retry_times_at_unknown_error=2,
):
):
"""
Request GPT model请求GPT模型同时维持用户界面活跃。
@ -64,15 +68,16 @@ def request_gpt_model_in_new_thread_with_ui_alive(
from request_llm.bridge_all import predict_no_ui_long_connection
# 用户反馈
chatbot.append([inputs_show_user, ""])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
executor = ThreadPoolExecutor(max_workers=16)
mutable = ["", time.time(), ""]
def _req_gpt(inputs, history, sys_prompt):
retry_op = retry_times_at_unknown_error
exceeded_cnt = 0
while True:
# watchdog error
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
if len(mutable) >= 2 and (time.time() - mutable[1]) > 5:
raise RuntimeError("检测到程序终止。")
try:
# 【第一种情况】:顺利完成
@ -89,14 +94,14 @@ def request_gpt_model_in_new_thread_with_ui_alive(
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
MAX_TOKEN = 4096
EXCEED_ALLO = 512 + 512 * exceeded_cnt
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
mutable[0] += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n'
continue # 返回重试
continue # 返回重试
else:
# 【选择放弃】
tb_str = '```\n' + trimmed_format_exc() + '```'
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
return mutable[0] # 放弃
return mutable[0] # 放弃
except:
# 【第三种情况】:其他错误:重试几次
tb_str = '```\n' + trimmed_format_exc() + '```'
@ -104,14 +109,15 @@ def request_gpt_model_in_new_thread_with_ui_alive(
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if retry_op > 0:
retry_op -= 1
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}\n\n"
mutable[
0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}\n\n"
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
time.sleep(30)
time.sleep(5)
continue # 返回重试
continue # 返回重试
else:
time.sleep(5)
return mutable[0] # 放弃
return mutable[0] # 放弃
# 提交任务
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
@ -123,21 +129,21 @@ def request_gpt_model_in_new_thread_with_ui_alive(
if future.done():
break
chatbot[-1] = [chatbot[-1][0], mutable[0]]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
final_result = future.result()
chatbot[-1] = [chatbot[-1][0], final_result]
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
return final_result
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array, inputs_show_user_array, llm_kwargs,
chatbot, history_array, sys_prompt_array,
inputs_array, inputs_show_user_array, llm_kwargs,
chatbot, history_array, sys_prompt_array,
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
handle_token_exceed=True, show_user_at_complete=False,
retry_times_at_unknown_error=2,
):
):
"""
Request GPT model using multiple threads with UI and high efficiency
请求GPT模型的[多线程]版。
@ -170,19 +176,21 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
from request_llm.bridge_all import predict_no_ui_long_connection
assert len(inputs_array) == len(history_array)
assert len(inputs_array) == len(sys_prompt_array)
if max_workers == -1: # 读取配置文件
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
except: max_workers = 8
if max_workers == -1: # 读取配置文件
try:
max_workers, = get_conf('DEFAULT_WORKER_NUM')
except:
max_workers = 8
if max_workers <= 0: max_workers = 3
# 屏蔽掉 chatglm的多线程可能会导致严重卡顿
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
max_workers = 1
executor = ThreadPoolExecutor(max_workers=max_workers)
n_frag = len(inputs_array)
# 用户反馈
chatbot.append(["请开始多线程操作。", ""])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
# 跨线程传递
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
@ -194,13 +202,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[index][2] = "执行中"
while True:
# watchdog error
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
if len(mutable[index]) >= 2 and (time.time() - mutable[index][1]) > 5:
raise RuntimeError("检测到程序终止。")
try:
# 【第一种情况】:顺利完成
# time.sleep(10); raise RuntimeError("测试")
gpt_say = predict_no_ui_long_connection(
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
)
mutable[index][2] = "已成功"
@ -214,24 +222,26 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
MAX_TOKEN = 4096
EXCEED_ALLO = 512 + 512 * exceeded_cnt
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
gpt_say += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n'
mutable[index][2] = f"截断重试"
continue # 返回重试
continue # 返回重试
else:
# 【选择放弃】
tb_str = '```\n' + trimmed_format_exc() + '```'
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
mutable[index][2] = "输入过长已放弃"
return gpt_say # 放弃
except:
return gpt_say # 放弃
except Exception as e:
# 【第三种情况】:其他错误
tb_str = '```\n' + trimmed_format_exc() + '```'
print(tb_str)
print(f"发生异常:{e}, 调用栈信息:{tb_str}")
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if retry_op > 0:
if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if retry_op > 0:
retry_op -= 1
wait = random.randint(5, 20)
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
@ -241,19 +251,22 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
fail_info = ""
# 也许等待十几秒后,情况会好转
for i in range(wait):
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
mutable[index][2] = f"{fail_info}等待重试 {wait - i}";
time.sleep(1)
# 开始重试
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试
mutable[index][
2] = f"重试中 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试
else:
mutable[index][2] = "已失败"
wait = 5
time.sleep(5)
return gpt_say # 放弃
return gpt_say # 放弃
# 异步任务开始
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in
zip(
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
cnt = 0
while True:
# yield一次以刷新前端页面
@ -267,17 +280,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[thread_index][1] = time.time()
# 在前端打印些好玩的东西
for thread_index, _ in enumerate(worker_done):
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
print_something_really_funny = "[ ...`" + mutable[thread_index][0][-scroller_max_len:]. \
replace('\n', '').replace('```', '...').replace(
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
' ', '.').replace('<br/>', '.....').replace('$', '.') + "`... ]"
observe_win.append(print_something_really_funny)
# 在前端打印些好玩的东西
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
if not done else f'`{mutable[thread_index][2]}`\n\n'
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
if not done else f'`{mutable[thread_index][2]}`\n\n'
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
# 在前端打印些好玩的东西
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.'] * (cnt % 10 + 1))]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
if all(worker_done):
executor.shutdown()
break
@ -287,13 +300,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
for inputs_show_user, f in zip(inputs_show_user_array, futures):
gpt_res = f.result()
gpt_response_collection.extend([inputs_show_user, gpt_res])
# 是否在结束时,在界面上显示结果
if show_user_at_complete:
for inputs_show_user, f in zip(inputs_show_user_array, futures):
gpt_res = f.result()
chatbot.append([inputs_show_user, gpt_res])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
time.sleep(0.3)
return gpt_response_collection
@ -306,6 +319,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
lines = txt_tocut.split('\n')
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
estimated_line_cut = int(estimated_line_cut)
cnt = 0
for cnt in reversed(range(estimated_line_cut)):
if must_break_at_empty_line:
if lines[cnt] != "":
@ -322,6 +336,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
result = [prev]
result.extend(cut(post, must_break_at_empty_line))
return result
try:
return cut(txt, must_break_at_empty_line=True)
except RuntimeError:
@ -337,9 +352,10 @@ def force_breakdown(txt, limit, get_token_fn):
return txt[:i], txt[i:]
return "Tiktoken未知错误", "Tiktoken未知错误"
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
# 递归
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
if get_token_fn(txt_tocut) <= limit:
return [txt_tocut]
else:
@ -365,6 +381,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
result = [prev]
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
return result
try:
# 第1次尝试将双空行\n\n作为切分点
return cut(txt, must_break_at_empty_line=True)
@ -375,7 +392,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
except RuntimeError:
try:
# 第3次尝试将英文句号.)作为切分点
res = cut(txt.replace('.', '\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
res = cut(txt.replace('.', '\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
return [r.replace('\n', '.') for r in res]
except RuntimeError as e:
try:
@ -387,7 +404,6 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
def read_and_clean_pdf_text(fp):
"""
这个函数用于分割pdf用了很多trick逻辑较乱效果奇好
@ -415,8 +431,9 @@ def read_and_clean_pdf_text(fp):
fc = 0 # Index 0 文本
fs = 1 # Index 1 字体
fb = 2 # Index 2 框框
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
def primary_ffsize(l):
"""
提取文本块主字体
@ -426,12 +443,12 @@ def read_and_clean_pdf_text(fp):
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
fsize_statiscs[wtf['size']] += len(wtf['text'])
return max(fsize_statiscs, key=fsize_statiscs.get)
def ffsize_same(a,b):
def ffsize_same(a, b):
"""
提取字体大小是否近似相等
"""
return abs((a-b)/max(a,b)) < 0.02
return abs((a - b) / max(a, b)) < 0.02
with fitz.open(fp) as doc:
meta_txt = []
@ -451,18 +468,19 @@ def read_and_clean_pdf_text(fp):
if len(txt_line) == 0: continue
pf = primary_ffsize(l)
meta_line.append([txt_line, pf, l['bbox'], l])
for wtf in l['spans']: # for l in t['lines']:
for wtf in l['spans']: # for l in t['lines']:
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
# meta_line.append(["NEW_BLOCK", pf])
# 块元提取 for each word segment with in line for each line cross-line words for each block
# 块元提取 for each word segment with in line for each line
# cross-line words for each block
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
if index == 0:
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
############################## <第 2 步,获取正文主字体> ##################################
fsize_statiscs = {}
for span in meta_span:
@ -476,32 +494,33 @@ def read_and_clean_pdf_text(fp):
mega_sec = []
sec = []
for index, line in enumerate(meta_line):
if index == 0:
if index == 0:
sec.append(line[fc])
continue
if REMOVE_FOOT_NOTE:
if meta_line[index][fs] <= give_up_fize_threshold:
continue
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
if ffsize_same(meta_line[index][fs], meta_line[index - 1][fs]):
# 尝试识别段落
if meta_line[index][fc].endswith('.') and\
(meta_line[index-1][fc] != 'NEW_BLOCK') and \
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
if meta_line[index][fc].endswith('.') and \
(meta_line[index - 1][fc] != 'NEW_BLOCK') and \
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (
meta_line[index - 1][fb][2] - meta_line[index - 1][fb][0]) * 0.7:
sec[-1] += line[fc]
sec[-1] += "\n\n"
else:
sec[-1] += " "
sec[-1] += line[fc]
else:
if (index+1 < len(meta_line)) and \
meta_line[index][fs] > main_fsize:
if (index + 1 < len(meta_line)) and \
meta_line[index][fs] > main_fsize:
# 单行 + 字体大
mega_sec.append(copy.deepcopy(sec))
sec = []
sec.append("# " + line[fc])
else:
# 尝试识别section
if meta_line[index-1][fs] > meta_line[index][fs]:
if meta_line[index - 1][fs] > meta_line[index][fs]:
sec.append("\n" + line[fc])
else:
sec.append(line[fc])
@ -520,13 +539,15 @@ def read_and_clean_pdf_text(fp):
if len(block_txt) < 100:
meta_txt[index] = '\n'
return meta_txt
meta_txt = 把字符太少的块清除为回车(meta_txt)
def 清理多余的空行(meta_txt):
for index in reversed(range(1, len(meta_txt))):
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
if meta_txt[index] == '\n' and meta_txt[index - 1] == '\n':
meta_txt.pop(index)
return meta_txt
meta_txt = 清理多余的空行(meta_txt)
def 合并小写开头的段落块(meta_txt):
@ -537,16 +558,18 @@ def read_and_clean_pdf_text(fp):
return True
else:
return False
for _ in range(100):
for index, block_txt in enumerate(meta_txt):
if starts_with_lowercase_word(block_txt):
if meta_txt[index-1] != '\n':
meta_txt[index-1] += ' '
if meta_txt[index - 1] != '\n':
meta_txt[index - 1] += ' '
else:
meta_txt[index-1] = ''
meta_txt[index-1] += meta_txt[index]
meta_txt[index - 1] = ''
meta_txt[index - 1] += meta_txt[index]
meta_txt[index] = '\n'
return meta_txt
meta_txt = 合并小写开头的段落块(meta_txt)
meta_txt = 清理多余的空行(meta_txt)
@ -566,7 +589,7 @@ def read_and_clean_pdf_text(fp):
return meta_txt, page_one_meta
def get_files_from_everything(txt, type): # type='.md'
def get_files_from_everything(txt, type): # type='.md'
"""
这个函数是用来获取指定目录下所有指定类型(如.md的文件并且对于网络上的文件也可以获取它。
下面是对每个参数和返回值的说明:
@ -588,9 +611,10 @@ def get_files_from_everything(txt, type): # type='.md'
from toolbox import get_conf
proxies, = get_conf('proxies')
r = requests.get(txt, proxies=proxies)
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
with open('./gpt_log/temp' + type, 'wb+') as f:
f.write(r.content)
project_folder = './gpt_log/'
file_manifest = ['./gpt_log/temp'+type]
file_manifest = ['./gpt_log/temp' + type]
elif txt.endswith(type):
# 直接给定文件
file_manifest = [txt]
@ -598,7 +622,7 @@ def get_files_from_everything(txt, type): # type='.md'
elif os.path.exists(txt):
# 本地路径,递归搜索
project_folder = txt
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*' + type, recursive=True)]
if len(file_manifest) == 0:
success = False
else:
@ -609,16 +633,14 @@ def get_files_from_everything(txt, type): # type='.md'
return success, file_manifest, project_folder
def Singleton(cls):
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton
@ -637,31 +659,30 @@ class knowledge_archive_interface():
from toolbox import ProxyNetworkActivate
print('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate(): # 临时地激活代理网络
with ProxyNetworkActivate(): # 临时地激活代理网络
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
return self.text2vec_large_chinese
def feed_archive(self, file_manifest, id="default"):
self.threadLock.acquire()
# import uuid
self.current_id = id
from zh_langchain import construct_vector_store
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=file_manifest,
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=file_manifest,
sentence_size=100,
history=[],
one_conent="",
one_content_segmentation="",
text2vec = self.get_chinese_text2vec(),
text2vec=self.get_chinese_text2vec(),
)
self.threadLock.release()
def get_current_archive_id(self):
return self.current_id
def get_loaded_file(self):
return self.qa_handle.get_loaded_file()
@ -670,30 +691,31 @@ class knowledge_archive_interface():
if not self.current_id == id:
self.current_id = id
from zh_langchain import construct_vector_store
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=[],
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=[],
sentence_size=100,
history=[],
one_conent="",
one_content_segmentation="",
text2vec = self.get_chinese_text2vec(),
text2vec=self.get_chinese_text2vec(),
)
VECTOR_SEARCH_SCORE_THRESHOLD = 0
VECTOR_SEARCH_TOP_K = 4
CHUNK_SIZE = 512
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
query = txt,
vs_path = self.kai_path,
query=txt,
vs_path=self.kai_path,
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
vector_search_top_k=VECTOR_SEARCH_TOP_K,
vector_search_top_k=VECTOR_SEARCH_TOP_K,
chunk_conent=True,
chunk_size=CHUNK_SIZE,
text2vec = self.get_chinese_text2vec(),
text2vec=self.get_chinese_text2vec(),
)
self.threadLock.release()
return resp, prompt
def try_install_deps(deps):
for dep in deps:
import subprocess, sys

View File

@ -203,6 +203,7 @@ def merge_tex_files_(project_foler, main_file, mode):
c = fx.read()
else:
# e.g., \input{srcs/07_appendix}
assert os.path.exists(fp+'.tex'), f'即找不到{fp},也找不到{fp}.texTex源文件缺失'
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
c = fx.read()
c = merge_tex_files_(project_foler, c, mode)

View File

@ -27,8 +27,10 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"):
}
response = requests.post(url, headers=headers, json=data, proxies=proxies)
print(response.content)
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
try:
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
except:
raise RuntimeError(response.content.decode())
# 文件保存到本地
r = requests.get(image_url, proxies=proxies)
file_path = 'gpt_log/image_gen/'

View File

@ -1,5 +1,5 @@
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui
from toolbox import update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import read_and_clean_pdf_text
@ -147,23 +147,14 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
print('writing html result failed:', trimmed_format_exc())
# 准备文件的下载
import shutil
for pdf_path in generated_conclusion_files:
# 重命名文件
rename_file = f'./gpt_log/翻译-{os.path.basename(pdf_path)}'
if os.path.exists(rename_file):
os.remove(rename_file)
shutil.copyfile(pdf_path, rename_file)
if os.path.exists(pdf_path):
os.remove(pdf_path)
rename_file = f'翻译-{os.path.basename(pdf_path)}'
promote_file_to_downloadzone(pdf_path, rename_file=rename_file, chatbot=chatbot)
for html_path in generated_html_files:
# 重命名文件
rename_file = f'./gpt_log/翻译-{os.path.basename(html_path)}'
if os.path.exists(rename_file):
os.remove(rename_file)
shutil.copyfile(html_path, rename_file)
if os.path.exists(html_path):
os.remove(html_path)
rename_file = f'翻译-{os.path.basename(html_path)}'
promote_file_to_downloadzone(html_path, rename_file=rename_file, chatbot=chatbot)
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@ -13,11 +13,11 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((txt, "正在同时咨询gpt-3.5和gpt-4……"))
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = 'gpt-3.5-turbo&gpt-4' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,

View File

@ -1,67 +1,78 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os
import time
import os
print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index == 0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs,
chatbot, history=[],
sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
history.append(i_say_show_user);
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2)
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot,
history=history,
sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
history.append(i_say)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
history = [] # 清空历史,以免输入溢出
import glob, os
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, system_prompt, web_port, history=None):
# history = [] # 清空历史,以免输入溢出
if history is None:
history = [] # 清空历史,以免输入溢出
import glob
import os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if txt == "":
txt = '空空如也的输入栏'
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@ -104,7 +104,7 @@ def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
meta_paper_info_list = meta_paper_info_list[batchsize:]
chatbot.append(["状态?",
"已经全部完成您可以试试让AI写一个Related Works例如您可以继续输入Write an academic \"Related Works\" section about \"你搜索的研究领域\" for me."])
"已经全部完成您可以试试让AI写一个Related Works例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
msg = '正常'
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history)

View File

@ -0,0 +1,28 @@
# encoding: utf-8
# @Time : 2023/4/19
# @Author : Spike
# @Descr :
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@CatchException
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
if txt:
show_say = txt
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
else:
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt,
inputs_show_user=show_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=history,
sys_prompt=system_prompt
)
chatbot[-1] = (show_say, gpt_say)
history.extend([show_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

Binary file not shown.

View File

@ -1,15 +1,13 @@
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
def main():
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
import gradio as gr
if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "用 pip install -r requirements.txt 安装依赖"
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖详情信息见requirements.txt"
from request_llm.bridge_all import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS')
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
# 如果WEB_PORT是-1, 则随机选取WEB端口
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
@ -56,7 +54,6 @@ def main():
cancel_handles = []
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
gr.HTML(title_html)
gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”Duplicate Space按钮<font color="#FF00FF">使用时先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”Duplicate Space之前填入API_KEY或进行提问否则您的API_KEY将极可能被空间所有者攫取<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
with gr_L1():
with gr_L2(scale=2):
@ -66,7 +63,7 @@ def main():
with gr_L2(scale=1):
with gr.Accordion("输入区", open=True) as area_input_primary:
with gr.Row():
txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥输入多个密钥时用英文逗号间隔。支持OpenAI密钥和API2D密钥共存。").style(container=False)
txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False)
with gr.Row():
submitBtn = gr.Button("提交", variant="primary")
with gr.Row():
@ -107,7 +104,7 @@ def main():
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",)
max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
@ -147,6 +144,11 @@ def main():
resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
clearBtn.click(lambda: ("",""), None, [txt, txt2])
clearBtn2.click(lambda: ("",""), None, [txt, txt2])
if AUTO_CLEAR_TXT:
submitBtn.click(lambda: ("",""), None, [txt, txt2])
submitBtn2.click(lambda: ("",""), None, [txt, txt2])
txt.submit(lambda: ("",""), None, [txt, txt2])
txt2.submit(lambda: ("",""), None, [txt, txt2])
# 基础功能区的回调函数注册
for k in functional:
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
@ -200,7 +202,10 @@ def main():
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
auto_opentab_delay()
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
server_name="0.0.0.0", server_port=PORT,
favicon_path="docs/logo.png", auth=AUTHENTICATION,
blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
# 如果需要在二级路径下运行
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')

View File

@ -28,6 +28,7 @@ proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def get_full_error(chunk, stream_response):
"""
获取完整的从Openai返回的报错
@ -40,7 +41,9 @@ def get_full_error(chunk, stream_response):
return chunk
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
def predict_no_ui_long_connection(
inputs, llm_kwargs, history=None, sys_prompt="", observe_window=None, console_slience=False
):
"""
发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs
@ -54,45 +57,59 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
"""
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
if history is None:
history = []
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0
from bridge_all import model_info
while True:
try:
# make a POST request to the API endpoint, stream=False
from .bridge_all import model_info
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
response = requests.post(endpoint, headers=headers, proxies=proxies,
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
except requests.exceptions.ReadTimeout as e:
json=payload, stream=True, timeout=TIMEOUT_SECONDS)
stream_response = response.iter_lines()
break
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
retry += 1
traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
if retry > MAX_RETRY:
raise TimeoutError
if MAX_RETRY != 0:
print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
except Exception as e:
print(f"出现异常:{e}")
raise e
stream_response = response.iter_lines()
result = ''
while True:
try: chunk = next(stream_response).decode()
try:
chunk = next(stream_response).decode()
except StopIteration:
break
except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
# except requests.exceptions.ConnectionError:
# chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk) == 0:
continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
if "reduce the length" in error_msg:
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
else:
raise RuntimeError("OpenAI拒绝了请求" + error_msg)
if ('data: [DONE]' in chunk): break # api2d 正常完成
if 'data: [DONE]' in chunk:
break # api2d 正常完成
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
delta = json_data["delta"]
if len(delta) == 0: break
if "role" in delta: continue
if len(delta) == 0:
break
if "role" in delta:
continue
if "content" in delta:
result += delta["content"]
if not console_slience: print(delta["content"], end='')
if not console_slience:
print(delta["content"], end='')
if observe_window is not None:
# 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += delta["content"]
@ -100,7 +117,8 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。")
else: raise RuntimeError("意外Json结构"+delta)
else:
raise RuntimeError("意外Json结构"+delta)
if json_data['finish_reason'] == 'length':
raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。")
return result
@ -228,6 +246,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
return
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""
整合所有信息选择LLM模型生成http请求为发送请求做准备
@ -247,23 +266,19 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
what_i_have_asked = {"role": "user", "content": history[index]}
what_gpt_answer = {"role": "assistant", "content": history[index + 1]}
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
if what_gpt_answer["content"] == timeout_bot_msg: continue
if what_gpt_answer["content"] == "":
continue
if what_gpt_answer["content"] == timeout_bot_msg:
continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
what_i_ask_now = {"role": "user", "content": inputs}
messages.append(what_i_ask_now)
payload = {
@ -278,8 +293,8 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
}
try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except:
print('输入中可能存在乱码。')
return headers,payload
except Exception as e:
print(f'输入中可能存在乱码。抛出异常: {e}')
return headers, payload

View File

@ -1,3 +1,4 @@
gradio>=3.33.1
tiktoken>=0.3.3
requests[socks]
transformers
@ -15,3 +16,5 @@ openai
numpy
arxiv
rich
langchain
zh_langchain

View File

@ -21,6 +21,7 @@ pj = os.path.join
========================================================================
"""
class ChatBotWithCookies(list):
def __init__(self, cookie):
self._cookies = cookie
@ -71,11 +72,13 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面
assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时可用clear将其清空然后用for+append循环重新赋值。"
yield chatbot.get_cookies(), chatbot, history, msg
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
"""
刷新用户界面
"""
if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg])
if len(chatbot) == 0:
chatbot.append(["update_ui_last_msg", lastmsg])
chatbot[-1] = list(chatbot[-1])
chatbot[-1][-1] = lastmsg
yield from update_ui(chatbot=chatbot, history=history)
@ -83,24 +86,25 @@ def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
def trimmed_format_exc():
import os, traceback
str = traceback.format_exc()
import os
import traceback
_str = traceback.format_exc()
current_path = os.getcwd()
replace_path = "."
return str.replace(current_path, replace_path)
return _str.replace(current_path, replace_path)
def CatchException(f):
"""
装饰器函数捕捉函数f中的异常并封装到一个生成器中返回并显示到聊天当中。
"""
@wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
try:
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
except Exception as e:
from check_proxy import check_proxy
from toolbox import get_conf
# from toolbox import get_conf # 不需要导入本文件内容
proxies, = get_conf('proxies')
tb_str = '```\n' + trimmed_format_exc() + '```'
if len(chatbot) == 0:
@ -108,7 +112,7 @@ def CatchException(f):
chatbot.append(["插件调度异常", "异常原因"])
chatbot[-1] = (chatbot[-1][0],
f"[Local Message] 实验性函数调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
yield from update_ui(chatbot=chatbot, history=history, msg=f'异常 {e}') # 刷新界面
yield from update_ui(chatbot=chatbot, history=history, msg=f'异常 {e}') # 刷新界面
return decorated
@ -148,6 +152,7 @@ def HotReload(f):
========================================================================
"""
def get_reduce_token_percent(text):
"""
* 此函数未来将被弃用
@ -207,8 +212,6 @@ def regular_txt_to_markdown(text):
return text
def report_execption(chatbot, history, a, b):
"""
向chatbot中添加错误信息
@ -238,6 +241,7 @@ def text_divide_paragraph(text):
text = "</br>".join(lines)
return pre + text + suf
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt):
"""
@ -440,6 +444,7 @@ def find_recent_files(directory):
return recent_files
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区
import shutil
@ -452,6 +457,7 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
else: current = []
chatbot._cookies.update({'file_to_promote': [new_path] + current})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
"""
当文件被上传时的回调函数
@ -505,17 +511,20 @@ def on_report_generated(cookies, files, chatbot):
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
return cookies, report_files, chatbot
def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE)
def is_api2d_key(key):
if key.startswith('fk') and len(key) == 41:
return True
else:
return False
def is_any_api_key(key):
if ',' in key:
keys = key.split(',')
@ -525,6 +534,7 @@ def is_any_api_key(key):
else:
return is_openai_api_key(key) or is_api2d_key(key)
def what_keys(keys):
avail_key_list = {'OpenAI Key':0, "API2D Key":0}
key_list = keys.split(',')
@ -539,6 +549,7 @@ def what_keys(keys):
return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']}API2D Key {avail_key_list['API2D Key']}"
def select_api_key(keys, llm_model):
import random
avail_key_list = []
@ -558,6 +569,7 @@ def select_api_key(keys, llm_model):
api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key
def read_env_variable(arg, default_value):
"""
环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG`
@ -612,6 +624,7 @@ def read_env_variable(arg, default_value):
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
return r
@lru_cache(maxsize=128)
def read_single_conf_with_lru_cache(arg):
from colorful import print亮红, print亮绿, print亮蓝
@ -676,6 +689,7 @@ class DummyWith():
def __exit__(self, exc_type, exc_value, traceback):
return
def run_gradio_in_subpath(demo, auth, port, custom_path):
"""
把gradio的运行地址更改到指定的二次路径上
@ -770,6 +784,7 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
========================================================================
"""
def zip_folder(source_folder, dest_folder, zip_name):
import zipfile
import os
@ -801,6 +816,7 @@ def zip_folder(source_folder, dest_folder, zip_name):
print(f"Zip file created at {zip_file}")
def zip_result(folder):
import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
@ -811,6 +827,7 @@ def gen_time_str():
import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
class ProxyNetworkActivate():
"""
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
@ -830,16 +847,18 @@ class ProxyNetworkActivate():
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
return
def objdump(obj, file='objdump.tmp'):
import pickle
with open(file, 'wb+') as f:
pickle.dump(obj, f)
return
def objload(file='objdump.tmp'):
import pickle, os
if not os.path.exists(file):
return
with open(file, 'rb') as f:
return pickle.load(f)