Compare commits

..

29 Commits

Author SHA1 Message Date
6df33c95d4 Merge pull request #841 from KelvinF97/master
Optimize some code and fix some bugs
2023-07-01 22:31:28 +08:00
59877dd728 Local variable 'result' might be referenced before assignment, add else result 2023-07-01 22:27:11 +08:00
a4f187b8dc Merge branch 'master' into master 2023-07-01 22:19:53 +08:00
5f7ffef238 增加基础功能判空 2023-07-01 22:04:42 +08:00
41c10f5688 report image generation error in UI 2023-07-01 02:28:32 +08:00
d7ac99f603 更正错误提示 2023-07-01 01:46:43 +08:00
1616daae6a Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master 2023-07-01 00:17:30 +08:00
a1092d8f92 提供自动清空输入框的选项 2023-07-01 00:17:26 +08:00
34ca9f138f Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-30 14:56:28 +08:00
df3f1aa3ca 更正ChatGLM2的默认Token数量 2023-06-30 14:56:22 +08:00
bf805cf477 Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master 2023-06-30 13:09:51 +08:00
ecb08e69be remove find picture core functionality 2023-06-30 13:08:54 +08:00
28c1e3f11b Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-06-30 12:06:33 +08:00
403667aec1 upgrade chatglm to chatglm2 2023-06-30 12:06:28 +08:00
2da36c7667 Optimizing the code, requests. exceptions. ConnectionError should be written in the post request instead of reading from the iterator. If the post request is unsuccessful, it will not be executed to the iterator step. 2023-06-06 16:10:54 +08:00
0e1de5a184 Optimize the code to make it more readable, catch other exceptions, and avoid response contentless reading exceptions 2023-06-06 10:57:52 +08:00
344579fa79 Every time the function is called, if the list parameter is not explicitly passed, the same default list will be used. This leads to the sharing of the same list object between function calls, resulting in a cumulative effect. 2023-06-06 10:31:28 +08:00
6d7ee17dbd Add zh_ Langchain into dependent files 2023-06-06 09:37:04 +08:00
0a83ba91e9 Add langchain & For safety reasons, try not to use compilation and installation packages from unknown sources or make changes/ Docs/graphic 3.32.2 py3 none any. whl is graphic>=3.33.1 2023-06-06 09:22:50 +08:00
ffd7363c4c need more detailed and comprehensive exception information, it is usually recommended to use the exception object e. Stack trace information can be used as a supplement to understand the context and call relationship of the exception occurrence 2023-06-06 09:13:13 +08:00
b538d31b13 Str is a built-in type and cannot be used directly as a variable name 2023-06-06 09:07:53 +08:00
543a8b98e9 Local variable 'result' might be referenced before assignment, add else result 2023-06-06 08:41:55 +08:00
55c6e9c59a Specify the proxy input type and use the get method to obtain dictionary data to avoid exceptions. Change the timeout to 30 seconds to avoid failures caused by network fluctuations. Obtain abnormal parameters and display them to the front-end for easy troubleshooting 2023-06-06 08:37:37 +08:00
0fc8f740d0 Fix PEP 8: E302 expected 2 blank lines, found 1 & PEP 8: E303 too many blank lines (4) 2023-06-06 08:29:26 +08:00
a019a64e65 PEP 8: E302 expected 2 blank lines, found 1 2023-06-06 08:26:54 +08:00
a75ae327e7 Make it comply with the PEP8 standard and improve PEP 8: E401 multiple imports on one line and PEP 8: E701 multiple statements on one line (colon) 2023-06-06 08:23:56 +08:00
3c38fad4aa PEP 8: E251 unexpected spaces around keyword / parameter equals 2023-06-06 08:14:21 +08:00
bf9731e937 Fix the issue of PEP 8: E401 multiple imports on one line 2023-06-06 08:07:50 +08:00
0f6e3e2dbb Fix the issue of ineffective transfer of reference history 2023-06-06 08:06:46 +08:00
23 changed files with 532 additions and 355 deletions

View File

@ -1,38 +0,0 @@
name: Build Image
on:
workflow_dispatch:
inputs:
release_tag:
description: 'Tag for the images'
required: true
env:
REGISTRY: registry.cn-hongkong.aliyuncs.com
NAMESPACE: chatwithpaper
IMAGE: academic
TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag }}
jobs:
build:
runs-on: ubuntu-latest
environment: production
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Login to Registry
uses: docker/login-action@v2.1.0
with:
registry: "${{ env.REGISTRY }}"
username: "${{ secrets.ACR_USER }}"
password: "${{ secrets.ACR_PASSWORD }}"
- name: Build and push image
uses: docker/build-push-action@v4
with:
context: .
file: docs/Dockerfile+NoLocal+Latex
tags: ${{ env.REGISTRY }}/${{ env.NAMESPACE }}/${{ env.IMAGE }}:${{ env.TAG }}
push: true

View File

@ -0,0 +1,44 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_chatglm_moss
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+ChatGLM+Moss
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -0,0 +1,44 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_jittorllms
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+JittorLLMs
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -0,0 +1,44 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_nolocal
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+NoLocal
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -1,10 +1,10 @@
def check_proxy(proxies): def check_proxy(proxies: dict):
import requests import requests
proxies_https = proxies['https'] if proxies is not None else '' proxies_https = proxies.get('https') if proxies is not None else ''
try: try:
response = requests.get("https://ipapi.co/json/", response = requests.get("https://ipapi.co/json/",
proxies=proxies, timeout=4) proxies=proxies, timeout=30)
data = response.json() data = response.json()
print(f'查询代理的地理位置,返回的结果是{data}') print(f'查询代理的地理位置,返回的结果是{data}')
if 'country_name' in data: if 'country_name' in data:
@ -12,10 +12,12 @@ def check_proxy(proxies):
result = f"代理配置 {proxies_https}, 代理所在地:{country}" result = f"代理配置 {proxies_https}, 代理所在地:{country}"
elif 'error' in data: elif 'error' in data:
result = f"代理配置 {proxies_https}, 代理所在地未知IP查询频率受限" result = f"代理配置 {proxies_https}, 代理所在地未知IP查询频率受限"
else:
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
print(result) print(result)
return result return result
except: except Exception as e:
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效" result = f"代理 {proxies_https} 查询出现异常: {e},代理可能无效"
print(result) print(result)
return result return result

View File

@ -47,7 +47,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo"] AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
@ -56,6 +56,9 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
# 设置gradio的并行线程数不需要修改 # 设置gradio的并行线程数不需要修改
CONCURRENT_COUNT = 100 CONCURRENT_COUNT = 100
# 是否在提交时自动清空输入框
AUTO_CLEAR_TXT = False
# 加一个live2d装饰 # 加一个live2d装饰
ADD_WAIFU = False ADD_WAIFU = False

View File

@ -42,7 +42,6 @@ def get_core_functions():
"中译英": { "中译英": {
"Prefix": r"Please translate following sentence to English:" + "\n\n", "Prefix": r"Please translate following sentence to English:" + "\n\n",
"Suffix": r"", "Suffix": r"",
"Visible": False,
}, },
"学术中英互译": { "学术中英互译": {
"Prefix": r"I want you to act as a scientific English-Chinese translator, " + "Prefix": r"I want you to act as a scientific English-Chinese translator, " +
@ -75,6 +74,5 @@ def get_core_functions():
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." + r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
r"Items need to be transformed:", r"Items need to be transformed:",
"Suffix": r"", "Suffix": r"",
"Visible": False,
} }
} }

View File

@ -26,7 +26,6 @@ def get_crazy_functions():
from crazy_functions.对话历史存档 import 删除所有本地对话历史记录 from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
from crazy_functions.批量Markdown翻译 import Markdown英译中 from crazy_functions.批量Markdown翻译 import Markdown英译中
function_plugins = { function_plugins = {
"解析整个Python项目": { "解析整个Python项目": {
"Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
@ -48,10 +47,10 @@ def get_crazy_functions():
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "若输入0则不解析notebook中的Markdown块", # 高级参数输入区的显示提示 "ArgsReminder": "若输入0则不解析notebook中的Markdown块", # 高级参数输入区的显示提示
}, },
# "批量总结Word文档": { "批量总结Word文档": {
# "Color": "stop", "Color": "stop",
# "Function": HotReload(总结word文档) "Function": HotReload(总结word文档)
# }, },
"解析整个C++项目头文件": { "解析整个C++项目头文件": {
"Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
@ -109,10 +108,10 @@ def get_crazy_functions():
"保存当前的对话": { "保存当前的对话": {
"Function": HotReload(对话历史存档) "Function": HotReload(对话历史存档)
}, },
# "[多线程Demo] 解析此项目本身(源码自译解)": { "[多线程Demo] 解析此项目本身(源码自译解)": {
# "AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
# "Function": HotReload(解析项目本身) "Function": HotReload(解析项目本身)
# }, },
# "[老旧的Demo] 把本项目源代码切换成全英文": { # "[老旧的Demo] 把本项目源代码切换成全英文": {
# # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 # # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
# "AsButton": False, # 加入下拉菜单中 # "AsButton": False, # 加入下拉菜单中
@ -138,15 +137,15 @@ def get_crazy_functions():
from crazy_functions.批量Markdown翻译 import Markdown中译英 from crazy_functions.批量Markdown翻译 import Markdown中译英
function_plugins.update({ function_plugins.update({
"本地PDF全文翻译": { "批量翻译PDF文档多线程": {
"Color": "stop", "Color": "stop",
"AsButton": True, # 加入下拉菜单中 "AsButton": True, # 加入下拉菜单中
"Function": HotReload(批量翻译PDF文档) "Function": HotReload(批量翻译PDF文档)
}, },
# "询问多个GPT模型": { "询问多个GPT模型": {
# "Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
# "Function": HotReload(同时问询) "Function": HotReload(同时问询)
# }, },
"[测试功能] 批量总结PDF文档": { "[测试功能] 批量总结PDF文档": {
"Color": "stop", "Color": "stop",
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
@ -223,57 +222,54 @@ def get_crazy_functions():
}) })
except: except:
print('Load function plugin failed') print('Load function plugin failed')
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
try:
from crazy_functions.联网的ChatGPT import 连接网络回答问题
function_plugins.update({ function_plugins.update({
"ArXiv Latex一键翻译输入区给定arXiv ID": { "连接网络回答问题(输入问题后点击该插件,需要访问谷歌": {
"Color": "stop", "Color": "stop",
"AsButton": True, "AsButton": False, # 加入下拉菜单中
"AdvancedArgs": True, "Function": HotReload(连接网络回答问题)
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
} }
}) })
# try: from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
# from crazy_functions.联网的ChatGPT import 连接网络回答问题 function_plugins.update({
# function_plugins.update({ "连接网络回答问题中文Bing版输入问题后点击该插件": {
# "连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": { "Color": "stop",
# "Color": "stop", "AsButton": False, # 加入下拉菜单中
# "AsButton": False, # 加入下拉菜单中 "Function": HotReload(连接bing搜索回答问题)
# "Function": HotReload(连接网络回答问题) }
# } })
# }) except:
# except: print('Load function plugin failed')
# print('Load function plugin failed')
# try: try:
# from crazy_functions.解析项目源代码 import 解析任意code项目 from crazy_functions.解析项目源代码 import 解析任意code项目
# function_plugins.update({ function_plugins.update({
# "解析项目源代码(手动指定和筛选源代码文件类型)": { "解析项目源代码(手动指定和筛选源代码文件类型)": {
# "Color": "stop", "Color": "stop",
# "AsButton": False, "AsButton": False,
# "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
# "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示 "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
# "Function": HotReload(解析任意code项目) "Function": HotReload(解析任意code项目)
# }, },
# }) })
# except: except:
# print('Load function plugin failed') print('Load function plugin failed')
# try: try:
# from crazy_functions.询问多个大语言模型 import 同时问询_指定模型 from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
# function_plugins.update({ function_plugins.update({
# "询问多个GPT模型手动指定询问哪些模型": { "询问多个GPT模型手动指定询问哪些模型": {
# "Color": "stop", "Color": "stop",
# "AsButton": False, "AsButton": False,
# "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
# "ArgsReminder": "支持任意数量的llm接口用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示 "ArgsReminder": "支持任意数量的llm接口用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
# "Function": HotReload(同时问询_指定模型) "Function": HotReload(同时问询_指定模型)
# }, },
# }) })
# except: except:
# print('Load function plugin failed') print('Load function plugin failed')
try: try:
from crazy_functions.图片生成 import 图片生成 from crazy_functions.图片生成 import 图片生成
@ -368,18 +364,29 @@ def get_crazy_functions():
"Function": HotReload(Latex英文纠错加PDF对比) "Function": HotReload(Latex英文纠错加PDF对比)
} }
}) })
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
# function_plugins.update({ function_plugins.update({
# "本地论文翻译上传Latex压缩包 [需Latex]": { "Arixv翻译输入arxivID[需Latex]": {
# "Color": "stop", "Color": "stop",
# "AsButton": False, "AsButton": False,
# "AdvancedArgs": True, "AdvancedArgs": True,
# "ArgsReminder": "ArgsReminder":
# "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
# "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ', "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
# "Function": HotReload(Latex翻译中文并重新编译PDF) "Function": HotReload(Latex翻译中文并重新编译PDF)
# } }
# }) })
function_plugins.update({
"本地论文翻译上传Latex压缩包[需Latex]": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
except: except:
print('Load function plugin failed') print('Load function plugin failed')
@ -397,18 +404,4 @@ def get_crazy_functions():
# except: # except:
# print('Load function plugin failed') # print('Load function plugin failed')
# try:
# from crazy_functions.虚空终端 import 终端
# function_plugins.update({
# "超级终端": {
# "Color": "stop",
# "AsButton": False,
# # "AdvancedArgs": True,
# # "ArgsReminder": "",
# "Function": HotReload(终端)
# }
# })
# except:
# print('Load function plugin failed')
return function_plugins return function_plugins

View File

@ -3,9 +3,7 @@ from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip
from functools import partial from functools import partial
import glob, os, requests, time import glob, os, requests, time
pj = os.path.join pj = os.path.join
# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
ARXIV_CACHE_DIR = os.getenv("Arxiv_Cache")
# =================================== 工具函数 =============================================== # =================================== 工具函数 ===============================================
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
@ -192,9 +190,9 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- if merge_translate_zh is already generated, skip gpt req -------------> # <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread.tex'): if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt) chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
# <-------------- compile PDF -------------> # <-------------- compile PDF ------------->

View File

@ -195,7 +195,7 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2303.08774" # txt = r"https://arxiv.org/abs/2303.08774"
# txt = r"https://arxiv.org/abs/2303.12712" # txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"C:\Users\fuqingxu\Desktop\9" txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):

View File

@ -1,11 +1,14 @@
from toolbox import update_ui, get_conf, trimmed_format_exc from toolbox import update_ui, get_conf, trimmed_format_exc
import threading import threading
def input_clipping(inputs, history, max_token_limit): def input_clipping(inputs, history, max_token_limit):
import numpy as np import numpy as np
from request_llm.bridge_all import model_info from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer'] enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
def get_token_num(txt):
return len(enc.encode(txt, disallowed_special=()))
mode = 'input-and-history' mode = 'input-and-history'
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史 # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
@ -35,6 +38,7 @@ def input_clipping(inputs, history, max_token_limit):
history = everything[1:] history = everything[1:]
return inputs, history return inputs, history
def request_gpt_model_in_new_thread_with_ui_alive( def request_gpt_model_in_new_thread_with_ui_alive(
inputs, inputs_show_user, llm_kwargs, inputs, inputs_show_user, llm_kwargs,
chatbot, history, sys_prompt, refresh_interval=0.2, chatbot, history, sys_prompt, refresh_interval=0.2,
@ -67,6 +71,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
executor = ThreadPoolExecutor(max_workers=16) executor = ThreadPoolExecutor(max_workers=16)
mutable = ["", time.time(), ""] mutable = ["", time.time(), ""]
def _req_gpt(inputs, history, sys_prompt): def _req_gpt(inputs, history, sys_prompt):
retry_op = retry_times_at_unknown_error retry_op = retry_times_at_unknown_error
exceeded_cnt = 0 exceeded_cnt = 0
@ -104,7 +109,8 @@ def request_gpt_model_in_new_thread_with_ui_alive(
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if retry_op > 0: if retry_op > 0:
retry_op -= 1 retry_op -= 1
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}\n\n" mutable[
0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}\n\n"
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str): if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
time.sleep(30) time.sleep(30)
time.sleep(5) time.sleep(5)
@ -171,8 +177,10 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
assert len(inputs_array) == len(history_array) assert len(inputs_array) == len(history_array)
assert len(inputs_array) == len(sys_prompt_array) assert len(inputs_array) == len(sys_prompt_array)
if max_workers == -1: # 读取配置文件 if max_workers == -1: # 读取配置文件
try: max_workers, = get_conf('DEFAULT_WORKER_NUM') try:
except: max_workers = 8 max_workers, = get_conf('DEFAULT_WORKER_NUM')
except:
max_workers = 8
if max_workers <= 0: max_workers = 3 if max_workers <= 0: max_workers = 3
# 屏蔽掉 chatglm的多线程可能会导致严重卡顿 # 屏蔽掉 chatglm的多线程可能会导致严重卡顿
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')): if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
@ -222,15 +230,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
# 【选择放弃】 # 【选择放弃】
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
mutable[index][2] = "输入过长已放弃" mutable[index][2] = "输入过长已放弃"
return gpt_say # 放弃 return gpt_say # 放弃
except: except Exception as e:
# 【第三种情况】:其他错误 # 【第三种情况】:其他错误
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
print(tb_str) print(f"发生异常:{e}, 调用栈信息:{tb_str}")
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if retry_op > 0: if retry_op > 0:
retry_op -= 1 retry_op -= 1
wait = random.randint(5, 20) wait = random.randint(5, 20)
@ -241,9 +251,11 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
fail_info = "" fail_info = ""
# 也许等待十几秒后,情况会好转 # 也许等待十几秒后,情况会好转
for i in range(wait): for i in range(wait):
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1) mutable[index][2] = f"{fail_info}等待重试 {wait - i}";
time.sleep(1)
# 开始重试 # 开始重试
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}" mutable[index][
2] = f"重试中 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试 continue # 返回重试
else: else:
mutable[index][2] = "已失败" mutable[index][2] = "已失败"
@ -252,7 +264,8 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
return gpt_say # 放弃 return gpt_say # 放弃
# 异步任务开始 # 异步任务开始
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip( futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in
zip(
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)] range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
cnt = 0 cnt = 0
while True: while True:
@ -306,6 +319,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
lines = txt_tocut.split('\n') lines = txt_tocut.split('\n')
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
estimated_line_cut = int(estimated_line_cut) estimated_line_cut = int(estimated_line_cut)
cnt = 0
for cnt in reversed(range(estimated_line_cut)): for cnt in reversed(range(estimated_line_cut)):
if must_break_at_empty_line: if must_break_at_empty_line:
if lines[cnt] != "": if lines[cnt] != "":
@ -322,6 +336,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
result = [prev] result = [prev]
result.extend(cut(post, must_break_at_empty_line)) result.extend(cut(post, must_break_at_empty_line))
return result return result
try: try:
return cut(txt, must_break_at_empty_line=True) return cut(txt, must_break_at_empty_line=True)
except RuntimeError: except RuntimeError:
@ -337,6 +352,7 @@ def force_breakdown(txt, limit, get_token_fn):
return txt[:i], txt[i:] return txt[:i], txt[i:]
return "Tiktoken未知错误", "Tiktoken未知错误" return "Tiktoken未知错误", "Tiktoken未知错误"
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit): def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
# 递归 # 递归
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False): def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
@ -365,6 +381,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
result = [prev] result = [prev]
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway)) result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
return result return result
try: try:
# 第1次尝试将双空行\n\n作为切分点 # 第1次尝试将双空行\n\n作为切分点
return cut(txt, must_break_at_empty_line=True) return cut(txt, must_break_at_empty_line=True)
@ -387,7 +404,6 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
return cut(txt, must_break_at_empty_line=False, break_anyway=True) return cut(txt, must_break_at_empty_line=False, break_anyway=True)
def read_and_clean_pdf_text(fp): def read_and_clean_pdf_text(fp):
""" """
这个函数用于分割pdf用了很多trick逻辑较乱效果奇好 这个函数用于分割pdf用了很多trick逻辑较乱效果奇好
@ -417,6 +433,7 @@ def read_and_clean_pdf_text(fp):
fb = 2 # Index 2 框框 fb = 2 # Index 2 框框
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等) REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化) REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
def primary_ffsize(l): def primary_ffsize(l):
""" """
提取文本块主字体 提取文本块主字体
@ -454,7 +471,8 @@ def read_and_clean_pdf_text(fp):
for wtf in l['spans']: # for l in t['lines']: for wtf in l['spans']: # for l in t['lines']:
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])]) meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
# meta_line.append(["NEW_BLOCK", pf]) # meta_line.append(["NEW_BLOCK", pf])
# 块元提取 for each word segment with in line for each line cross-line words for each block # 块元提取 for each word segment with in line for each line
# cross-line words for each block
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace( meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t]) '- ', '') for t in text_areas['blocks'] if 'lines' in t])
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']]) meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
@ -486,7 +504,8 @@ def read_and_clean_pdf_text(fp):
# 尝试识别段落 # 尝试识别段落
if meta_line[index][fc].endswith('.') and \ if meta_line[index][fc].endswith('.') and \
(meta_line[index - 1][fc] != 'NEW_BLOCK') and \ (meta_line[index - 1][fc] != 'NEW_BLOCK') and \
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7: (meta_line[index][fb][2] - meta_line[index][fb][0]) < (
meta_line[index - 1][fb][2] - meta_line[index - 1][fb][0]) * 0.7:
sec[-1] += line[fc] sec[-1] += line[fc]
sec[-1] += "\n\n" sec[-1] += "\n\n"
else: else:
@ -520,6 +539,7 @@ def read_and_clean_pdf_text(fp):
if len(block_txt) < 100: if len(block_txt) < 100:
meta_txt[index] = '\n' meta_txt[index] = '\n'
return meta_txt return meta_txt
meta_txt = 把字符太少的块清除为回车(meta_txt) meta_txt = 把字符太少的块清除为回车(meta_txt)
def 清理多余的空行(meta_txt): def 清理多余的空行(meta_txt):
@ -527,6 +547,7 @@ def read_and_clean_pdf_text(fp):
if meta_txt[index] == '\n' and meta_txt[index - 1] == '\n': if meta_txt[index] == '\n' and meta_txt[index - 1] == '\n':
meta_txt.pop(index) meta_txt.pop(index)
return meta_txt return meta_txt
meta_txt = 清理多余的空行(meta_txt) meta_txt = 清理多余的空行(meta_txt)
def 合并小写开头的段落块(meta_txt): def 合并小写开头的段落块(meta_txt):
@ -537,6 +558,7 @@ def read_and_clean_pdf_text(fp):
return True return True
else: else:
return False return False
for _ in range(100): for _ in range(100):
for index, block_txt in enumerate(meta_txt): for index, block_txt in enumerate(meta_txt):
if starts_with_lowercase_word(block_txt): if starts_with_lowercase_word(block_txt):
@ -547,6 +569,7 @@ def read_and_clean_pdf_text(fp):
meta_txt[index - 1] += meta_txt[index] meta_txt[index - 1] += meta_txt[index]
meta_txt[index] = '\n' meta_txt[index] = '\n'
return meta_txt return meta_txt
meta_txt = 合并小写开头的段落块(meta_txt) meta_txt = 合并小写开头的段落块(meta_txt)
meta_txt = 清理多余的空行(meta_txt) meta_txt = 清理多余的空行(meta_txt)
@ -588,7 +611,8 @@ def get_files_from_everything(txt, type): # type='.md'
from toolbox import get_conf from toolbox import get_conf
proxies, = get_conf('proxies') proxies, = get_conf('proxies')
r = requests.get(txt, proxies=proxies) r = requests.get(txt, proxies=proxies)
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content) with open('./gpt_log/temp' + type, 'wb+') as f:
f.write(r.content)
project_folder = './gpt_log/' project_folder = './gpt_log/'
file_manifest = ['./gpt_log/temp' + type] file_manifest = ['./gpt_log/temp' + type]
elif txt.endswith(type): elif txt.endswith(type):
@ -609,8 +633,6 @@ def get_files_from_everything(txt, type): # type='.md'
return success, file_manifest, project_folder return success, file_manifest, project_folder
def Singleton(cls): def Singleton(cls):
_instance = {} _instance = {}
@ -642,7 +664,6 @@ class knowledge_archive_interface():
return self.text2vec_large_chinese return self.text2vec_large_chinese
def feed_archive(self, file_manifest, id="default"): def feed_archive(self, file_manifest, id="default"):
self.threadLock.acquire() self.threadLock.acquire()
# import uuid # import uuid
@ -694,6 +715,7 @@ class knowledge_archive_interface():
self.threadLock.release() self.threadLock.release()
return resp, prompt return resp, prompt
def try_install_deps(deps): def try_install_deps(deps):
for dep in deps: for dep in deps:
import subprocess, sys import subprocess, sys

View File

@ -203,6 +203,7 @@ def merge_tex_files_(project_foler, main_file, mode):
c = fx.read() c = fx.read()
else: else:
# e.g., \input{srcs/07_appendix} # e.g., \input{srcs/07_appendix}
assert os.path.exists(fp+'.tex'), f'即找不到{fp},也找不到{fp}.texTex源文件缺失'
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx: with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
c = fx.read() c = fx.read()
c = merge_tex_files_(project_foler, c, mode) c = merge_tex_files_(project_foler, c, mode)
@ -449,10 +450,9 @@ class LatexPaperSplit():
""" """
def __init__(self) -> None: def __init__(self) -> None:
self.nodes = None self.nodes = None
self.msg = "{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \ self.msg = "*{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址: \\url{https://github.com/binary-husky/gpt_academic/}。" + \ "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
"项目在线体验地址: \\url{https://chatpaper.org}。"
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者 # 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"

View File

@ -27,8 +27,10 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"):
} }
response = requests.post(url, headers=headers, json=data, proxies=proxies) response = requests.post(url, headers=headers, json=data, proxies=proxies)
print(response.content) print(response.content)
try:
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url'] image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
except:
raise RuntimeError(response.content.decode())
# 文件保存到本地 # 文件保存到本地
r = requests.get(image_url, proxies=proxies) r = requests.get(image_url, proxies=proxies)
file_path = 'gpt_log/image_gen/' file_path = 'gpt_log/image_gen/'

View File

@ -1,11 +1,13 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False fast_debug = False
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os import time
import os
print('begin analysis on:', file_manifest) print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest): for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f: with open(fp, 'r', encoding='utf-8', errors='replace') as f:
@ -20,10 +22,13 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
if not fast_debug: if not fast_debug:
msg = '正常' msg = '正常'
# ** gpt request ** # ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时 gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs,
chatbot, history=[],
sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say) chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say) history.append(i_say_show_user);
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2) if not fast_debug: time.sleep(2)
@ -35,25 +40,31 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
if not fast_debug: if not fast_debug:
msg = '正常' msg = '正常'
# ** gpt request ** # ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时 gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot,
history=history,
sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say) chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say) history.append(i_say)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history) res = write_results_to_file(history)
chatbot.append(("完成了吗?", res)) chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException @CatchException
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, system_prompt, web_port, history=None):
# history = [] # 清空历史,以免输入溢出
if history is None:
history = [] # 清空历史,以免输入溢出 history = [] # 清空历史,以免输入溢出
import glob, os import glob
import os
if os.path.exists(txt): if os.path.exists(txt):
project_folder = txt project_folder = txt
else: else:
if txt == "": txt = '空空如也的输入栏' if txt == "":
txt = '空空如也的输入栏'
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}") report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return return

View File

@ -0,0 +1,28 @@
# encoding: utf-8
# @Time : 2023/4/19
# @Author : Spike
# @Descr :
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@CatchException
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
if txt:
show_say = txt
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
else:
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt,
inputs_show_user=show_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=history,
sys_prompt=system_prompt
)
chatbot[-1] = (show_say, gpt_say)
history.extend([show_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@ -96,6 +96,15 @@
● 部署名(不是模型名) ● 部署名(不是模型名)
# 修改 config.py
```
AZURE_ENDPOINT = "填入终结点"
AZURE_API_KEY = "填入azure openai api的密钥"
AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改
AZURE_ENGINE = "填入部署名"
```
# API的使用 # API的使用
接下来就是具体怎么使用API了还是可以参考官方文档[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) 接下来就是具体怎么使用API了还是可以参考官方文档[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)

38
main.py
View File

@ -1,5 +1,4 @@
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
from pathlib import Path
def main(): def main():
import gradio as gr import gradio as gr
@ -7,8 +6,8 @@ def main():
from request_llm.bridge_all import predict from request_llm.bridge_all import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到 # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \ proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS') get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
# 如果WEB_PORT是-1, 则随机选取WEB端口 # 如果WEB_PORT是-1, 则随机选取WEB端口
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
@ -16,7 +15,7 @@ def main():
from check_proxy import get_current_version from check_proxy import get_current_version
initial_prompt = "Serve me as a writing and programming assistant." initial_prompt = "Serve me as a writing and programming assistant."
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 网页测试版 {get_current_version()}</h1>" title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>"
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)""" description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
# 问询记录, python 版本建议3.9+(越新越好) # 问询记录, python 版本建议3.9+(越新越好)
@ -53,21 +52,7 @@ def main():
CHATBOT_HEIGHT /= 2 CHATBOT_HEIGHT /= 2
cancel_handles = [] cancel_handles = []
# Read your Baidu statistics code from the file
baidu_stats_code = Path('./sites/baidu_stats.html').read_text()
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo: with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
# Insert your Baidu statistics code here
gradio_original_template_fn = gr.routes.templates.TemplateResponse
def gradio_new_template_fn(*args, **kwargs):
res = gradio_original_template_fn(*args, **kwargs)
res.body = res.body.replace(b'</html>', f'{baidu_stats_code}</html>'.encode("utf8"))
res.init_headers()
return res
gr.routes.templates.TemplateResponse = gradio_new_template_fn # override gradio template
# Insert Title
gr.HTML(title_html) gr.HTML(title_html)
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL}) cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
with gr_L1(): with gr_L1():
@ -86,13 +71,7 @@ def main():
stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm") stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm") clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm")
with gr.Row(): with gr.Row():
status = gr.Markdown(f"""Tips: 1. 按Enter提交, 按Shift+Enter换行2. 当前模型: {LLM_MODEL} \n {proxy_info}. status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行当前模型: {LLM_MODEL} \n {proxy_info}")
3. 请注意隐私保护和遵守法律法规;
4. 请勿使用本服务进行违法犯罪活动;
5. 我和qingxu都希望能够为大家提供一个好的**学术工具**,希望大家不要攻击和滥用本服务;
6. 本服务还存在各种bug如果发现bug欢迎加群反馈或者发issue告诉我们
7. 希望大家能结合ChatPaper的速读找到需要精读的再用本工具的全文翻译实现快速知识摄取。
""")
with gr.Accordion("基础功能区", open=True) as area_basic_fn: with gr.Accordion("基础功能区", open=True) as area_basic_fn:
with gr.Row(): with gr.Row():
for k in functional: for k in functional:
@ -125,7 +104,7 @@ def main():
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt) system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",) top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",) temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",) max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区") checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False) md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
@ -165,6 +144,11 @@ def main():
resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status]) resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
clearBtn.click(lambda: ("",""), None, [txt, txt2]) clearBtn.click(lambda: ("",""), None, [txt, txt2])
clearBtn2.click(lambda: ("",""), None, [txt, txt2]) clearBtn2.click(lambda: ("",""), None, [txt, txt2])
if AUTO_CLEAR_TXT:
submitBtn.click(lambda: ("",""), None, [txt, txt2])
submitBtn2.click(lambda: ("",""), None, [txt, txt2])
txt.submit(lambda: ("",""), None, [txt, txt2])
txt2.submit(lambda: ("",""), None, [txt, txt2])
# 基础功能区的回调函数注册 # 基础功能区的回调函数注册
for k in functional: for k in functional:
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
@ -188,7 +172,6 @@ def main():
ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")}) ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
return ret return ret
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] ) dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
def on_md_dropdown_changed(k): def on_md_dropdown_changed(k):
return {chatbot: gr.update(label="当前模型:"+k)} return {chatbot: gr.update(label="当前模型:"+k)}
md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] ) md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
@ -202,7 +185,6 @@ def main():
# 终止按钮的回调函数注册 # 终止按钮的回调函数注册
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
demo.load(on_dropdown_changed, inputs=gr.State("ArXiv Latex一键翻译输入区给定arXiv ID"), outputs=[switchy_bt, plugin_advanced_arg])
# gradio的inbrowser触发不太稳定回滚代码到原始的浏览器打开函数 # gradio的inbrowser触发不太稳定回滚代码到原始的浏览器打开函数
def auto_opentab_delay(): def auto_opentab_delay():

View File

@ -152,7 +152,7 @@ model_info = {
"token_cnt": get_token_num_gpt4, "token_cnt": get_token_num_gpt4,
}, },
# chatglm # chatglm 直接对齐到 chatglm2
"chatglm": { "chatglm": {
"fn_with_ui": chatglm_ui, "fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui, "fn_without_ui": chatglm_noui,
@ -161,6 +161,15 @@ model_info = {
"tokenizer": tokenizer_gpt35, "tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35, "token_cnt": get_token_num_gpt35,
}, },
"chatglm2": {
"fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui,
"endpoint": None,
"max_token": 1024,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# newbing # newbing
"newbing": { "newbing": {
"fn_with_ui": newbing_ui, "fn_with_ui": newbing_ui,

View File

@ -40,12 +40,12 @@ class GetGLMHandle(Process):
while True: while True:
try: try:
if self.chatglm_model is None: if self.chatglm_model is None:
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE') device, = get_conf('LOCAL_MODEL_DEVICE')
if device=='cpu': if device=='cpu':
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float() self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
else: else:
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval() self.chatglm_model = self.chatglm_model.eval()
break break
else: else:

View File

@ -28,6 +28,7 @@ proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def get_full_error(chunk, stream_response): def get_full_error(chunk, stream_response):
""" """
获取完整的从Openai返回的报错 获取完整的从Openai返回的报错
@ -40,7 +41,9 @@ def get_full_error(chunk, stream_response):
return chunk return chunk
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): def predict_no_ui_long_connection(
inputs, llm_kwargs, history=None, sys_prompt="", observe_window=None, console_slience=False
):
""" """
发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。 发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs inputs
@ -54,45 +57,59 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
observe_window = None observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗 用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
""" """
if history is None:
history = []
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0 retry = 0
from bridge_all import model_info
while True: while True:
try: try:
# make a POST request to the API endpoint, stream=False # make a POST request to the API endpoint, stream=False
from .bridge_all import model_info
endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
response = requests.post(endpoint, headers=headers, proxies=proxies, response = requests.post(endpoint, headers=headers, proxies=proxies,
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break json=payload, stream=True, timeout=TIMEOUT_SECONDS)
except requests.exceptions.ReadTimeout as e: stream_response = response.iter_lines()
break
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY:
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') raise TimeoutError
if MAX_RETRY != 0:
print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
except Exception as e:
print(f"出现异常:{e}")
raise e
stream_response = response.iter_lines()
result = '' result = ''
while True: while True:
try: chunk = next(stream_response).decode() try:
chunk = next(stream_response).decode()
except StopIteration: except StopIteration:
break break
except requests.exceptions.ConnectionError: # except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。 # chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue if len(chunk) == 0:
continue
if not chunk.startswith('data:'): if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode() error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
if "reduce the length" in error_msg: if "reduce the length" in error_msg:
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg) raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
else: else:
raise RuntimeError("OpenAI拒绝了请求" + error_msg) raise RuntimeError("OpenAI拒绝了请求" + error_msg)
if ('data: [DONE]' in chunk): break # api2d 正常完成 if 'data: [DONE]' in chunk:
break # api2d 正常完成
json_data = json.loads(chunk.lstrip('data:'))['choices'][0] json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
delta = json_data["delta"] delta = json_data["delta"]
if len(delta) == 0: break if len(delta) == 0:
if "role" in delta: continue break
if "role" in delta:
continue
if "content" in delta: if "content" in delta:
result += delta["content"] result += delta["content"]
if not console_slience: print(delta["content"], end='') if not console_slience:
print(delta["content"], end='')
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += delta["content"] if len(observe_window) >= 1: observe_window[0] += delta["content"]
@ -100,7 +117,8 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
if len(observe_window) >= 2: if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience: if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。") raise RuntimeError("用户取消了程序。")
else: raise RuntimeError("意外Json结构"+delta) else:
raise RuntimeError("意外Json结构"+delta)
if json_data['finish_reason'] == 'length': if json_data['finish_reason'] == 'length':
raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。") raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。")
return result return result
@ -228,6 +246,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
return return
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
""" """
整合所有信息选择LLM模型生成http请求为发送请求做准备 整合所有信息选择LLM模型生成http请求为发送请求做准备
@ -247,23 +266,19 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
messages = [{"role": "system", "content": system_prompt}] messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt: if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2): for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {} what_i_have_asked = {"role": "user", "content": history[index]}
what_i_have_asked["role"] = "user" what_gpt_answer = {"role": "assistant", "content": history[index + 1]}
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "": if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue if what_gpt_answer["content"] == "":
if what_gpt_answer["content"] == timeout_bot_msg: continue continue
if what_gpt_answer["content"] == timeout_bot_msg:
continue
messages.append(what_i_have_asked) messages.append(what_i_have_asked)
messages.append(what_gpt_answer) messages.append(what_gpt_answer)
else: else:
messages[-1]['content'] = what_gpt_answer['content'] messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {} what_i_ask_now = {"role": "user", "content": inputs}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now) messages.append(what_i_ask_now)
payload = { payload = {
@ -278,8 +293,8 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
} }
try: try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except: except Exception as e:
print('输入中可能存在乱码。') print(f'输入中可能存在乱码。抛出异常: {e}')
return headers, payload return headers, payload

View File

@ -1,4 +1,4 @@
./docs/gradio-3.32.2-py3-none-any.whl gradio>=3.33.1
tiktoken>=0.3.3 tiktoken>=0.3.3
requests[socks] requests[socks]
transformers transformers
@ -16,3 +16,5 @@ openai
numpy numpy
arxiv arxiv
rich rich
langchain
zh_langchain

View File

@ -1,10 +0,0 @@
<!-- baidu_stats.html -->
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "https://hm.baidu.com/hm.js?208673d55832a94b9bbe10b1f4e70c09";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>

View File

@ -21,6 +21,7 @@ pj = os.path.join
======================================================================== ========================================================================
""" """
class ChatBotWithCookies(list): class ChatBotWithCookies(list):
def __init__(self, cookie): def __init__(self, cookie):
self._cookies = cookie self._cookies = cookie
@ -71,11 +72,13 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面
assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时可用clear将其清空然后用for+append循环重新赋值。" assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时可用clear将其清空然后用for+append循环重新赋值。"
yield chatbot.get_cookies(), chatbot, history, msg yield chatbot.get_cookies(), chatbot, history, msg
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面 def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
""" """
刷新用户界面 刷新用户界面
""" """
if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg]) if len(chatbot) == 0:
chatbot.append(["update_ui_last_msg", lastmsg])
chatbot[-1] = list(chatbot[-1]) chatbot[-1] = list(chatbot[-1])
chatbot[-1][-1] = lastmsg chatbot[-1][-1] = lastmsg
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
@ -83,24 +86,25 @@ def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
def trimmed_format_exc(): def trimmed_format_exc():
import os, traceback import os
str = traceback.format_exc() import traceback
_str = traceback.format_exc()
current_path = os.getcwd() current_path = os.getcwd()
replace_path = "." replace_path = "."
return str.replace(current_path, replace_path) return _str.replace(current_path, replace_path)
def CatchException(f): def CatchException(f):
""" """
装饰器函数捕捉函数f中的异常并封装到一个生成器中返回并显示到聊天当中。 装饰器函数捕捉函数f中的异常并封装到一个生成器中返回并显示到聊天当中。
""" """
@wraps(f) @wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1): def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
try: try:
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT) yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
except Exception as e: except Exception as e:
from check_proxy import check_proxy from check_proxy import check_proxy
from toolbox import get_conf # from toolbox import get_conf # 不需要导入本文件内容
proxies, = get_conf('proxies') proxies, = get_conf('proxies')
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
if len(chatbot) == 0: if len(chatbot) == 0:
@ -148,6 +152,7 @@ def HotReload(f):
======================================================================== ========================================================================
""" """
def get_reduce_token_percent(text): def get_reduce_token_percent(text):
""" """
* 此函数未来将被弃用 * 此函数未来将被弃用
@ -207,8 +212,6 @@ def regular_txt_to_markdown(text):
return text return text
def report_execption(chatbot, history, a, b): def report_execption(chatbot, history, a, b):
""" """
向chatbot中添加错误信息 向chatbot中添加错误信息
@ -238,6 +241,7 @@ def text_divide_paragraph(text):
text = "</br>".join(lines) text = "</br>".join(lines)
return pre + text + suf return pre + text + suf
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt): def markdown_convertion(txt):
""" """
@ -440,6 +444,7 @@ def find_recent_files(directory):
return recent_files return recent_files
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区 # 将文件复制一份到下载区
import shutil import shutil
@ -452,6 +457,7 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
else: current = [] else: current = []
chatbot._cookies.update({'file_to_promote': [new_path] + current}) chatbot._cookies.update({'file_to_promote': [new_path] + current})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
""" """
当文件被上传时的回调函数 当文件被上传时的回调函数
@ -505,17 +511,20 @@ def on_report_generated(cookies, files, chatbot):
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
return cookies, report_files, chatbot return cookies, report_files, chatbot
def is_openai_api_key(key): def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key) API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE) return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE)
def is_api2d_key(key): def is_api2d_key(key):
if key.startswith('fk') and len(key) == 41: if key.startswith('fk') and len(key) == 41:
return True return True
else: else:
return False return False
def is_any_api_key(key): def is_any_api_key(key):
if ',' in key: if ',' in key:
keys = key.split(',') keys = key.split(',')
@ -525,6 +534,7 @@ def is_any_api_key(key):
else: else:
return is_openai_api_key(key) or is_api2d_key(key) return is_openai_api_key(key) or is_api2d_key(key)
def what_keys(keys): def what_keys(keys):
avail_key_list = {'OpenAI Key':0, "API2D Key":0} avail_key_list = {'OpenAI Key':0, "API2D Key":0}
key_list = keys.split(',') key_list = keys.split(',')
@ -539,6 +549,7 @@ def what_keys(keys):
return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']}API2D Key {avail_key_list['API2D Key']}" return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']}API2D Key {avail_key_list['API2D Key']}"
def select_api_key(keys, llm_model): def select_api_key(keys, llm_model):
import random import random
avail_key_list = [] avail_key_list = []
@ -558,6 +569,7 @@ def select_api_key(keys, llm_model):
api_key = random.choice(avail_key_list) # 随机负载均衡 api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key return api_key
def read_env_variable(arg, default_value): def read_env_variable(arg, default_value):
""" """
环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG` 环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG`
@ -612,6 +624,7 @@ def read_env_variable(arg, default_value):
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}") print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
return r return r
@lru_cache(maxsize=128) @lru_cache(maxsize=128)
def read_single_conf_with_lru_cache(arg): def read_single_conf_with_lru_cache(arg):
from colorful import print亮红, print亮绿, print亮蓝 from colorful import print亮红, print亮绿, print亮蓝
@ -676,6 +689,7 @@ class DummyWith():
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
return return
def run_gradio_in_subpath(demo, auth, port, custom_path): def run_gradio_in_subpath(demo, auth, port, custom_path):
""" """
把gradio的运行地址更改到指定的二次路径上 把gradio的运行地址更改到指定的二次路径上
@ -770,6 +784,7 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
======================================================================== ========================================================================
""" """
def zip_folder(source_folder, dest_folder, zip_name): def zip_folder(source_folder, dest_folder, zip_name):
import zipfile import zipfile
import os import os
@ -801,6 +816,7 @@ def zip_folder(source_folder, dest_folder, zip_name):
print(f"Zip file created at {zip_file}") print(f"Zip file created at {zip_file}")
def zip_result(folder): def zip_result(folder):
import time import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
@ -811,6 +827,7 @@ def gen_time_str():
import time import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
class ProxyNetworkActivate(): class ProxyNetworkActivate():
""" """
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理 这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
@ -830,12 +847,14 @@ class ProxyNetworkActivate():
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY') if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
return return
def objdump(obj, file='objdump.tmp'): def objdump(obj, file='objdump.tmp'):
import pickle import pickle
with open(file, 'wb+') as f: with open(file, 'wb+') as f:
pickle.dump(obj, f) pickle.dump(obj, f)
return return
def objload(file='objdump.tmp'): def objload(file='objdump.tmp'):
import pickle, os import pickle, os
if not os.path.exists(file): if not os.path.exists(file):