Compare commits

..

23 Commits

Author SHA1 Message Date
066b5d4d29 update pdf translate fn 2023-06-30 13:04:33 +08:00
2373348c9e 修改布局,删除本地LLM接口 2023-06-30 12:51:25 +08:00
3f8a145c1d Merge branch 'master' of https://github.com/kaixindelele/gpt_academic into chatpaper-master 2023-06-30 12:01:08 +08:00
146fde30b8 Update latex_utils.py 2023-06-27 17:31:09 +08:00
e79dcb1b48 fix bugs of baidu stats 2023-06-26 20:27:31 -07:00
0aadeabccc add baidu stats 2023-06-27 10:05:36 +08:00
4bc073b072 readme 2023-06-23 10:53:29 +08:00
756bd29f0c Update main.py 2023-06-21 18:49:57 +08:00
66c9e9a3cf arxiv_cache 2023-06-21 18:04:37 +08:00
ca49af1e53 functional 2023-06-21 18:02:08 +08:00
78df094eb9 update 2023-06-21 17:58:13 +08:00
b24e664a85 Update latex_utils.py 2023-06-20 21:06:34 +08:00
af3a1901a0 Update Latex输出PDF结果.py 2023-06-20 21:06:13 +08:00
8affcd92a9 Update crazy_functional.py 2023-06-20 21:04:49 +08:00
d83e0a7704 Update main.py 2023-06-20 21:04:32 +08:00
78c53b6bec Update crazy_functional.py 2023-06-19 20:58:38 +08:00
84e09766cd Rename build-image to build-image.yaml 2023-06-19 17:16:26 +08:00
a84f4f43bf delete unused action 2023-06-19 17:15:32 +08:00
cb7f6984a2 Create build-image 2023-06-19 17:07:09 +08:00
5703beb06b Merge pull request #1 from kaixindelele/patch-1
Patch 1
2023-06-19 15:18:42 +08:00
fcb0f466b9 Update crazy_functional.py 2023-06-19 15:11:32 +08:00
1b31d2e0d5 Merge branch 'binary-husky:master' into master 2023-06-19 14:26:41 +08:00
baa26e67ef arxiv_cache 2023-06-18 13:37:32 +08:00
23 changed files with 355 additions and 532 deletions

38
.github/workflows/build-image.yaml vendored Normal file
View File

@ -0,0 +1,38 @@
name: Build Image
on:
workflow_dispatch:
inputs:
release_tag:
description: 'Tag for the images'
required: true
env:
REGISTRY: registry.cn-hongkong.aliyuncs.com
NAMESPACE: chatwithpaper
IMAGE: academic
TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag }}
jobs:
build:
runs-on: ubuntu-latest
environment: production
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Login to Registry
uses: docker/login-action@v2.1.0
with:
registry: "${{ env.REGISTRY }}"
username: "${{ secrets.ACR_USER }}"
password: "${{ secrets.ACR_PASSWORD }}"
- name: Build and push image
uses: docker/build-push-action@v4
with:
context: .
file: docs/Dockerfile+NoLocal+Latex
tags: ${{ env.REGISTRY }}/${{ env.NAMESPACE }}/${{ env.IMAGE }}:${{ env.TAG }}
push: true

View File

@ -1,44 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_chatglm_moss
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+ChatGLM+Moss
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -1,44 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_jittorllms
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+JittorLLMs
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -1,44 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_nolocal
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+NoLocal
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -1,10 +1,10 @@
def check_proxy(proxies: dict):
def check_proxy(proxies):
import requests
proxies_https = proxies.get('https') if proxies is not None else ''
proxies_https = proxies['https'] if proxies is not None else ''
try:
response = requests.get("https://ipapi.co/json/",
proxies=proxies, timeout=30)
proxies=proxies, timeout=4)
data = response.json()
print(f'查询代理的地理位置,返回的结果是{data}')
if 'country_name' in data:
@ -12,12 +12,10 @@ def check_proxy(proxies: dict):
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
elif 'error' in data:
result = f"代理配置 {proxies_https}, 代理所在地未知IP查询频率受限"
else:
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
print(result)
return result
except Exception as e:
result = f"代理 {proxies_https} 查询出现异常: {e},代理可能无效"
except:
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
print(result)
return result

View File

@ -47,7 +47,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
@ -56,9 +56,6 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
# 设置gradio的并行线程数不需要修改
CONCURRENT_COUNT = 100
# 是否在提交时自动清空输入框
AUTO_CLEAR_TXT = False
# 加一个live2d装饰
ADD_WAIFU = False

View File

@ -42,6 +42,7 @@ def get_core_functions():
"中译英": {
"Prefix": r"Please translate following sentence to English:" + "\n\n",
"Suffix": r"",
"Visible": False,
},
"学术中英互译": {
"Prefix": r"I want you to act as a scientific English-Chinese translator, " +
@ -74,5 +75,6 @@ def get_core_functions():
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
r"Items need to be transformed:",
"Suffix": r"",
"Visible": False,
}
}

View File

@ -26,6 +26,7 @@ def get_crazy_functions():
from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
from crazy_functions.批量Markdown翻译 import Markdown英译中
function_plugins = {
"解析整个Python项目": {
"Color": "stop", # 按钮颜色
@ -47,10 +48,10 @@ def get_crazy_functions():
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "若输入0则不解析notebook中的Markdown块", # 高级参数输入区的显示提示
},
"批量总结Word文档": {
"Color": "stop",
"Function": HotReload(总结word文档)
},
# "批量总结Word文档": {
# "Color": "stop",
# "Function": HotReload(总结word文档)
# },
"解析整个C++项目头文件": {
"Color": "stop", # 按钮颜色
"AsButton": False, # 加入下拉菜单中
@ -108,10 +109,10 @@ def get_crazy_functions():
"保存当前的对话": {
"Function": HotReload(对话历史存档)
},
"[多线程Demo] 解析此项目本身(源码自译解)": {
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(解析项目本身)
},
# "[多线程Demo] 解析此项目本身(源码自译解)": {
# "AsButton": False, # 加入下拉菜单中
# "Function": HotReload(解析项目本身)
# },
# "[老旧的Demo] 把本项目源代码切换成全英文": {
# # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
# "AsButton": False, # 加入下拉菜单中
@ -137,15 +138,15 @@ def get_crazy_functions():
from crazy_functions.批量Markdown翻译 import Markdown中译英
function_plugins.update({
"批量翻译PDF文档多线程": {
"本地PDF全文翻译": {
"Color": "stop",
"AsButton": True, # 加入下拉菜单中
"Function": HotReload(批量翻译PDF文档)
},
"询问多个GPT模型": {
"Color": "stop", # 按钮颜色
"Function": HotReload(同时问询)
},
# "询问多个GPT模型": {
# "Color": "stop", # 按钮颜色
# "Function": HotReload(同时问询)
# },
"[测试功能] 批量总结PDF文档": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
@ -222,54 +223,57 @@ def get_crazy_functions():
})
except:
print('Load function plugin failed')
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
"ArXiv Latex一键翻译输入区给定arXiv ID": {
"Color": "stop",
"AsButton": True,
"AdvancedArgs": True,
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
# try:
# from crazy_functions.联网的ChatGPT import 连接网络回答问题
# function_plugins.update({
# "连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": {
# "Color": "stop",
# "AsButton": False, # 加入下拉菜单中
# "Function": HotReload(连接网络回答问题)
# }
# })
# except:
# print('Load function plugin failed')
try:
from crazy_functions.联网的ChatGPT import 连接网络回答问题
function_plugins.update({
"连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(连接网络回答问题)
}
})
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
function_plugins.update({
"连接网络回答问题中文Bing版输入问题后点击该插件": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(连接bing搜索回答问题)
}
})
except:
print('Load function plugin failed')
# try:
# from crazy_functions.解析项目源代码 import 解析任意code项目
# function_plugins.update({
# "解析项目源代码(手动指定和筛选源代码文件类型)": {
# "Color": "stop",
# "AsButton": False,
# "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
# "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
# "Function": HotReload(解析任意code项目)
# },
# })
# except:
# print('Load function plugin failed')
try:
from crazy_functions.解析项目源代码 import 解析任意code项目
function_plugins.update({
"解析项目源代码(手动指定和筛选源代码文件类型)": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
"Function": HotReload(解析任意code项目)
},
})
except:
print('Load function plugin failed')
try:
from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
function_plugins.update({
"询问多个GPT模型手动指定询问哪些模型": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "支持任意数量的llm接口用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
"Function": HotReload(同时问询_指定模型)
},
})
except:
print('Load function plugin failed')
# try:
# from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
# function_plugins.update({
# "询问多个GPT模型手动指定询问哪些模型": {
# "Color": "stop",
# "AsButton": False,
# "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
# "ArgsReminder": "支持任意数量的llm接口用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
# "Function": HotReload(同时问询_指定模型)
# },
# })
# except:
# print('Load function plugin failed')
try:
from crazy_functions.图片生成 import 图片生成
@ -364,29 +368,18 @@ def get_crazy_functions():
"Function": HotReload(Latex英文纠错加PDF对比)
}
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
"Arixv翻译输入arxivID[需Latex]": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
function_plugins.update({
"本地论文翻译上传Latex压缩包[需Latex]": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
# function_plugins.update({
# "本地论文翻译上传Latex压缩包 [需Latex]": {
# "Color": "stop",
# "AsButton": False,
# "AdvancedArgs": True,
# "ArgsReminder":
# "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
# "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
# "Function": HotReload(Latex翻译中文并重新编译PDF)
# }
# })
except:
print('Load function plugin failed')
@ -404,4 +397,18 @@ def get_crazy_functions():
# except:
# print('Load function plugin failed')
# try:
# from crazy_functions.虚空终端 import 终端
# function_plugins.update({
# "超级终端": {
# "Color": "stop",
# "AsButton": False,
# # "AdvancedArgs": True,
# # "ArgsReminder": "",
# "Function": HotReload(终端)
# }
# })
# except:
# print('Load function plugin failed')
return function_plugins

View File

@ -3,7 +3,9 @@ from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip
from functools import partial
import glob, os, requests, time
pj = os.path.join
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
ARXIV_CACHE_DIR = os.getenv("Arxiv_Cache")
# =================================== 工具函数 ===============================================
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
@ -190,9 +192,9 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
if not os.path.exists(project_folder + '/merge_proofread.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
# <-------------- compile PDF ------------->

View File

@ -195,7 +195,7 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2303.08774"
# txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
txt = r"C:\Users\fuqingxu\Desktop\9"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):

View File

@ -1,19 +1,16 @@
from toolbox import update_ui, get_conf, trimmed_format_exc
import threading
def input_clipping(inputs, history, max_token_limit):
import numpy as np
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt):
return len(enc.encode(txt, disallowed_special=()))
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
mode = 'input-and-history'
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
input_token_num = get_token_num(inputs)
if input_token_num < max_token_limit // 2:
if input_token_num < max_token_limit//2:
mode = 'only-history'
max_token_limit = max_token_limit - input_token_num
@ -21,13 +18,13 @@ def input_clipping(inputs, history, max_token_limit):
everything.extend(history)
n_token = get_token_num('\n'.join(everything))
everything_token = [get_token_num(e) for e in everything]
delta = max(everything_token) // 16 # 截断时的颗粒度
delta = max(everything_token) // 16 # 截断时的颗粒度
while n_token > max_token_limit:
where = np.argmax(everything_token)
encoded = enc.encode(everything[where], disallowed_special=())
clipped_encoded = encoded[:len(encoded) - delta]
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
clipped_encoded = encoded[:len(encoded)-delta]
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
everything_token[where] = get_token_num(everything[where])
n_token = get_token_num('\n'.join(everything))
@ -38,13 +35,12 @@ def input_clipping(inputs, history, max_token_limit):
history = everything[1:]
return inputs, history
def request_gpt_model_in_new_thread_with_ui_alive(
inputs, inputs_show_user, llm_kwargs,
inputs, inputs_show_user, llm_kwargs,
chatbot, history, sys_prompt, refresh_interval=0.2,
handle_token_exceed=True,
handle_token_exceed=True,
retry_times_at_unknown_error=2,
):
):
"""
Request GPT model请求GPT模型同时维持用户界面活跃。
@ -68,16 +64,15 @@ def request_gpt_model_in_new_thread_with_ui_alive(
from request_llm.bridge_all import predict_no_ui_long_connection
# 用户反馈
chatbot.append([inputs_show_user, ""])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
executor = ThreadPoolExecutor(max_workers=16)
mutable = ["", time.time(), ""]
def _req_gpt(inputs, history, sys_prompt):
retry_op = retry_times_at_unknown_error
exceeded_cnt = 0
while True:
# watchdog error
if len(mutable) >= 2 and (time.time() - mutable[1]) > 5:
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
raise RuntimeError("检测到程序终止。")
try:
# 【第一种情况】:顺利完成
@ -94,14 +89,14 @@ def request_gpt_model_in_new_thread_with_ui_alive(
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
MAX_TOKEN = 4096
EXCEED_ALLO = 512 + 512 * exceeded_cnt
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
mutable[0] += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n'
continue # 返回重试
continue # 返回重试
else:
# 【选择放弃】
tb_str = '```\n' + trimmed_format_exc() + '```'
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
return mutable[0] # 放弃
return mutable[0] # 放弃
except:
# 【第三种情况】:其他错误:重试几次
tb_str = '```\n' + trimmed_format_exc() + '```'
@ -109,15 +104,14 @@ def request_gpt_model_in_new_thread_with_ui_alive(
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if retry_op > 0:
retry_op -= 1
mutable[
0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}\n\n"
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}\n\n"
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
time.sleep(30)
time.sleep(5)
continue # 返回重试
continue # 返回重试
else:
time.sleep(5)
return mutable[0] # 放弃
return mutable[0] # 放弃
# 提交任务
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
@ -129,21 +123,21 @@ def request_gpt_model_in_new_thread_with_ui_alive(
if future.done():
break
chatbot[-1] = [chatbot[-1][0], mutable[0]]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
final_result = future.result()
chatbot[-1] = [chatbot[-1][0], final_result]
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
return final_result
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array, inputs_show_user_array, llm_kwargs,
chatbot, history_array, sys_prompt_array,
inputs_array, inputs_show_user_array, llm_kwargs,
chatbot, history_array, sys_prompt_array,
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
handle_token_exceed=True, show_user_at_complete=False,
retry_times_at_unknown_error=2,
):
):
"""
Request GPT model using multiple threads with UI and high efficiency
请求GPT模型的[多线程]版。
@ -176,21 +170,19 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
from request_llm.bridge_all import predict_no_ui_long_connection
assert len(inputs_array) == len(history_array)
assert len(inputs_array) == len(sys_prompt_array)
if max_workers == -1: # 读取配置文件
try:
max_workers, = get_conf('DEFAULT_WORKER_NUM')
except:
max_workers = 8
if max_workers == -1: # 读取配置文件
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
except: max_workers = 8
if max_workers <= 0: max_workers = 3
# 屏蔽掉 chatglm的多线程可能会导致严重卡顿
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
max_workers = 1
executor = ThreadPoolExecutor(max_workers=max_workers)
n_frag = len(inputs_array)
# 用户反馈
chatbot.append(["请开始多线程操作。", ""])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
# 跨线程传递
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
@ -202,13 +194,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[index][2] = "执行中"
while True:
# watchdog error
if len(mutable[index]) >= 2 and (time.time() - mutable[index][1]) > 5:
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
raise RuntimeError("检测到程序终止。")
try:
# 【第一种情况】:顺利完成
# time.sleep(10); raise RuntimeError("测试")
gpt_say = predict_no_ui_long_connection(
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
)
mutable[index][2] = "已成功"
@ -222,26 +214,24 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
MAX_TOKEN = 4096
EXCEED_ALLO = 512 + 512 * exceeded_cnt
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
gpt_say += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n'
mutable[index][2] = f"截断重试"
continue # 返回重试
continue # 返回重试
else:
# 【选择放弃】
tb_str = '```\n' + trimmed_format_exc() + '```'
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
mutable[index][2] = "输入过长已放弃"
return gpt_say # 放弃
except Exception as e:
return gpt_say # 放弃
except:
# 【第三种情况】:其他错误
tb_str = '```\n' + trimmed_format_exc() + '```'
print(f"发生异常:{e}, 调用栈信息:{tb_str}")
print(tb_str)
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if retry_op > 0:
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if retry_op > 0:
retry_op -= 1
wait = random.randint(5, 20)
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
@ -251,22 +241,19 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
fail_info = ""
# 也许等待十几秒后,情况会好转
for i in range(wait):
mutable[index][2] = f"{fail_info}等待重试 {wait - i}";
time.sleep(1)
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
# 开始重试
mutable[index][
2] = f"重试中 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试
else:
mutable[index][2] = "已失败"
wait = 5
time.sleep(5)
return gpt_say # 放弃
return gpt_say # 放弃
# 异步任务开始
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in
zip(
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
cnt = 0
while True:
# yield一次以刷新前端页面
@ -280,17 +267,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[thread_index][1] = time.time()
# 在前端打印些好玩的东西
for thread_index, _ in enumerate(worker_done):
print_something_really_funny = "[ ...`" + mutable[thread_index][0][-scroller_max_len:]. \
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
replace('\n', '').replace('```', '...').replace(
' ', '.').replace('<br/>', '.....').replace('$', '.') + "`... ]"
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
observe_win.append(print_something_really_funny)
# 在前端打印些好玩的东西
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
if not done else f'`{mutable[thread_index][2]}`\n\n'
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
if not done else f'`{mutable[thread_index][2]}`\n\n'
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
# 在前端打印些好玩的东西
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.'] * (cnt % 10 + 1))]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
if all(worker_done):
executor.shutdown()
break
@ -300,13 +287,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
for inputs_show_user, f in zip(inputs_show_user_array, futures):
gpt_res = f.result()
gpt_response_collection.extend([inputs_show_user, gpt_res])
# 是否在结束时,在界面上显示结果
if show_user_at_complete:
for inputs_show_user, f in zip(inputs_show_user_array, futures):
gpt_res = f.result()
chatbot.append([inputs_show_user, gpt_res])
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
time.sleep(0.3)
return gpt_response_collection
@ -319,7 +306,6 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
lines = txt_tocut.split('\n')
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
estimated_line_cut = int(estimated_line_cut)
cnt = 0
for cnt in reversed(range(estimated_line_cut)):
if must_break_at_empty_line:
if lines[cnt] != "":
@ -336,7 +322,6 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
result = [prev]
result.extend(cut(post, must_break_at_empty_line))
return result
try:
return cut(txt, must_break_at_empty_line=True)
except RuntimeError:
@ -352,10 +337,9 @@ def force_breakdown(txt, limit, get_token_fn):
return txt[:i], txt[i:]
return "Tiktoken未知错误", "Tiktoken未知错误"
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
# 递归
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
if get_token_fn(txt_tocut) <= limit:
return [txt_tocut]
else:
@ -381,7 +365,6 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
result = [prev]
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
return result
try:
# 第1次尝试将双空行\n\n作为切分点
return cut(txt, must_break_at_empty_line=True)
@ -392,7 +375,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
except RuntimeError:
try:
# 第3次尝试将英文句号.)作为切分点
res = cut(txt.replace('.', '\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
res = cut(txt.replace('.', '\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
return [r.replace('\n', '.') for r in res]
except RuntimeError as e:
try:
@ -404,6 +387,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
def read_and_clean_pdf_text(fp):
"""
这个函数用于分割pdf用了很多trick逻辑较乱效果奇好
@ -431,9 +415,8 @@ def read_and_clean_pdf_text(fp):
fc = 0 # Index 0 文本
fs = 1 # Index 1 字体
fb = 2 # Index 2 框框
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
def primary_ffsize(l):
"""
提取文本块主字体
@ -443,12 +426,12 @@ def read_and_clean_pdf_text(fp):
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
fsize_statiscs[wtf['size']] += len(wtf['text'])
return max(fsize_statiscs, key=fsize_statiscs.get)
def ffsize_same(a, b):
def ffsize_same(a,b):
"""
提取字体大小是否近似相等
"""
return abs((a - b) / max(a, b)) < 0.02
return abs((a-b)/max(a,b)) < 0.02
with fitz.open(fp) as doc:
meta_txt = []
@ -468,19 +451,18 @@ def read_and_clean_pdf_text(fp):
if len(txt_line) == 0: continue
pf = primary_ffsize(l)
meta_line.append([txt_line, pf, l['bbox'], l])
for wtf in l['spans']: # for l in t['lines']:
for wtf in l['spans']: # for l in t['lines']:
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
# meta_line.append(["NEW_BLOCK", pf])
# 块元提取 for each word segment with in line for each line
# cross-line words for each block
# 块元提取 for each word segment with in line for each line cross-line words for each block
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
if index == 0:
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
############################## <第 2 步,获取正文主字体> ##################################
fsize_statiscs = {}
for span in meta_span:
@ -494,33 +476,32 @@ def read_and_clean_pdf_text(fp):
mega_sec = []
sec = []
for index, line in enumerate(meta_line):
if index == 0:
if index == 0:
sec.append(line[fc])
continue
if REMOVE_FOOT_NOTE:
if meta_line[index][fs] <= give_up_fize_threshold:
continue
if ffsize_same(meta_line[index][fs], meta_line[index - 1][fs]):
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
# 尝试识别段落
if meta_line[index][fc].endswith('.') and \
(meta_line[index - 1][fc] != 'NEW_BLOCK') and \
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (
meta_line[index - 1][fb][2] - meta_line[index - 1][fb][0]) * 0.7:
if meta_line[index][fc].endswith('.') and\
(meta_line[index-1][fc] != 'NEW_BLOCK') and \
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
sec[-1] += line[fc]
sec[-1] += "\n\n"
else:
sec[-1] += " "
sec[-1] += line[fc]
else:
if (index + 1 < len(meta_line)) and \
meta_line[index][fs] > main_fsize:
if (index+1 < len(meta_line)) and \
meta_line[index][fs] > main_fsize:
# 单行 + 字体大
mega_sec.append(copy.deepcopy(sec))
sec = []
sec.append("# " + line[fc])
else:
# 尝试识别section
if meta_line[index - 1][fs] > meta_line[index][fs]:
if meta_line[index-1][fs] > meta_line[index][fs]:
sec.append("\n" + line[fc])
else:
sec.append(line[fc])
@ -539,15 +520,13 @@ def read_and_clean_pdf_text(fp):
if len(block_txt) < 100:
meta_txt[index] = '\n'
return meta_txt
meta_txt = 把字符太少的块清除为回车(meta_txt)
def 清理多余的空行(meta_txt):
for index in reversed(range(1, len(meta_txt))):
if meta_txt[index] == '\n' and meta_txt[index - 1] == '\n':
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
meta_txt.pop(index)
return meta_txt
meta_txt = 清理多余的空行(meta_txt)
def 合并小写开头的段落块(meta_txt):
@ -558,18 +537,16 @@ def read_and_clean_pdf_text(fp):
return True
else:
return False
for _ in range(100):
for index, block_txt in enumerate(meta_txt):
if starts_with_lowercase_word(block_txt):
if meta_txt[index - 1] != '\n':
meta_txt[index - 1] += ' '
if meta_txt[index-1] != '\n':
meta_txt[index-1] += ' '
else:
meta_txt[index - 1] = ''
meta_txt[index - 1] += meta_txt[index]
meta_txt[index-1] = ''
meta_txt[index-1] += meta_txt[index]
meta_txt[index] = '\n'
return meta_txt
meta_txt = 合并小写开头的段落块(meta_txt)
meta_txt = 清理多余的空行(meta_txt)
@ -589,7 +566,7 @@ def read_and_clean_pdf_text(fp):
return meta_txt, page_one_meta
def get_files_from_everything(txt, type): # type='.md'
def get_files_from_everything(txt, type): # type='.md'
"""
这个函数是用来获取指定目录下所有指定类型(如.md的文件并且对于网络上的文件也可以获取它。
下面是对每个参数和返回值的说明:
@ -611,10 +588,9 @@ def get_files_from_everything(txt, type): # type='.md'
from toolbox import get_conf
proxies, = get_conf('proxies')
r = requests.get(txt, proxies=proxies)
with open('./gpt_log/temp' + type, 'wb+') as f:
f.write(r.content)
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
project_folder = './gpt_log/'
file_manifest = ['./gpt_log/temp' + type]
file_manifest = ['./gpt_log/temp'+type]
elif txt.endswith(type):
# 直接给定文件
file_manifest = [txt]
@ -622,7 +598,7 @@ def get_files_from_everything(txt, type): # type='.md'
elif os.path.exists(txt):
# 本地路径,递归搜索
project_folder = txt
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*' + type, recursive=True)]
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
if len(file_manifest) == 0:
success = False
else:
@ -633,14 +609,16 @@ def get_files_from_everything(txt, type): # type='.md'
return success, file_manifest, project_folder
def Singleton(cls):
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton
@ -659,30 +637,31 @@ class knowledge_archive_interface():
from toolbox import ProxyNetworkActivate
print('Checking Text2vec ...')
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
with ProxyNetworkActivate(): # 临时地激活代理网络
with ProxyNetworkActivate(): # 临时地激活代理网络
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
return self.text2vec_large_chinese
def feed_archive(self, file_manifest, id="default"):
self.threadLock.acquire()
# import uuid
self.current_id = id
from zh_langchain import construct_vector_store
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=file_manifest,
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=file_manifest,
sentence_size=100,
history=[],
one_conent="",
one_content_segmentation="",
text2vec=self.get_chinese_text2vec(),
text2vec = self.get_chinese_text2vec(),
)
self.threadLock.release()
def get_current_archive_id(self):
return self.current_id
def get_loaded_file(self):
return self.qa_handle.get_loaded_file()
@ -691,31 +670,30 @@ class knowledge_archive_interface():
if not self.current_id == id:
self.current_id = id
from zh_langchain import construct_vector_store
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=[],
self.qa_handle, self.kai_path = construct_vector_store(
vs_id=self.current_id,
files=[],
sentence_size=100,
history=[],
one_conent="",
one_content_segmentation="",
text2vec=self.get_chinese_text2vec(),
text2vec = self.get_chinese_text2vec(),
)
VECTOR_SEARCH_SCORE_THRESHOLD = 0
VECTOR_SEARCH_TOP_K = 4
CHUNK_SIZE = 512
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
query=txt,
vs_path=self.kai_path,
query = txt,
vs_path = self.kai_path,
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
vector_search_top_k=VECTOR_SEARCH_TOP_K,
vector_search_top_k=VECTOR_SEARCH_TOP_K,
chunk_conent=True,
chunk_size=CHUNK_SIZE,
text2vec=self.get_chinese_text2vec(),
text2vec = self.get_chinese_text2vec(),
)
self.threadLock.release()
return resp, prompt
def try_install_deps(deps):
for dep in deps:
import subprocess, sys

View File

@ -203,7 +203,6 @@ def merge_tex_files_(project_foler, main_file, mode):
c = fx.read()
else:
# e.g., \input{srcs/07_appendix}
assert os.path.exists(fp+'.tex'), f'即找不到{fp},也找不到{fp}.texTex源文件缺失'
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
c = fx.read()
c = merge_tex_files_(project_foler, c, mode)
@ -450,9 +449,10 @@ class LatexPaperSplit():
"""
def __init__(self) -> None:
self.nodes = None
self.msg = "*{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \
self.msg = "{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
"项目Github地址: \\url{https://github.com/binary-husky/gpt_academic/}。" + \
"项目在线体验地址: \\url{https://chatpaper.org}。"
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"

View File

@ -27,10 +27,8 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"):
}
response = requests.post(url, headers=headers, json=data, proxies=proxies)
print(response.content)
try:
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
except:
raise RuntimeError(response.content.decode())
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
# 文件保存到本地
r = requests.get(image_url, proxies=proxies)
file_path = 'gpt_log/image_gen/'

View File

@ -1,78 +1,67 @@
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time
import os
import time, glob, os
print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index == 0 else ""
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs,
chatbot, history=[],
sys_prompt=system_prompt) # 带超时倒计时
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user);
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
history.append(i_say_show_user); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2)
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
chatbot.append((i_say, "[Local Message] waiting gpt response."))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if not fast_debug:
if not fast_debug:
msg = '正常'
# ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot,
history=history,
sys_prompt=system_prompt) # 带超时倒计时
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say)
history.append(i_say)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
history.append(i_say); history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, system_prompt, web_port, history=None):
# history = [] # 清空历史,以免输入溢出
if history is None:
history = [] # 清空历史,以免输入溢出
import glob
import os
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "":
txt = '空空如也的输入栏'
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@ -1,28 +0,0 @@
# encoding: utf-8
# @Time : 2023/4/19
# @Author : Spike
# @Descr :
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
@CatchException
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
if txt:
show_say = txt
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
else:
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=prompt,
inputs_show_user=show_say,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history=history,
sys_prompt=system_prompt
)
chatbot[-1] = (show_say, gpt_say)
history.extend([show_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@ -96,15 +96,6 @@
● 部署名(不是模型名)
# 修改 config.py
```
AZURE_ENDPOINT = "填入终结点"
AZURE_API_KEY = "填入azure openai api的密钥"
AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改
AZURE_ENGINE = "填入部署名"
```
# API的使用
接下来就是具体怎么使用API了还是可以参考官方文档[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)

38
main.py
View File

@ -1,4 +1,5 @@
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
from pathlib import Path
def main():
import gradio as gr
@ -6,8 +7,8 @@ def main():
from request_llm.bridge_all import predict
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS')
# 如果WEB_PORT是-1, 则随机选取WEB端口
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
@ -15,7 +16,7 @@ def main():
from check_proxy import get_current_version
initial_prompt = "Serve me as a writing and programming assistant."
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>"
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 网页测试版 {get_current_version()}</h1>"
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
# 问询记录, python 版本建议3.9+(越新越好)
@ -52,7 +53,21 @@ def main():
CHATBOT_HEIGHT /= 2
cancel_handles = []
# Read your Baidu statistics code from the file
baidu_stats_code = Path('./sites/baidu_stats.html').read_text()
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
# Insert your Baidu statistics code here
gradio_original_template_fn = gr.routes.templates.TemplateResponse
def gradio_new_template_fn(*args, **kwargs):
res = gradio_original_template_fn(*args, **kwargs)
res.body = res.body.replace(b'</html>', f'{baidu_stats_code}</html>'.encode("utf8"))
res.init_headers()
return res
gr.routes.templates.TemplateResponse = gradio_new_template_fn # override gradio template
# Insert Title
gr.HTML(title_html)
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
with gr_L1():
@ -71,7 +86,13 @@ def main():
stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm")
with gr.Row():
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行当前模型: {LLM_MODEL} \n {proxy_info}")
status = gr.Markdown(f"""Tips: 1. 按Enter提交, 按Shift+Enter换行2. 当前模型: {LLM_MODEL} \n {proxy_info}.
3. 请注意隐私保护和遵守法律法规;
4. 请勿使用本服务进行违法犯罪活动;
5. 我和qingxu都希望能够为大家提供一个好的**学术工具**,希望大家不要攻击和滥用本服务;
6. 本服务还存在各种bug如果发现bug欢迎加群反馈或者发issue告诉我们
7. 希望大家能结合ChatPaper的速读找到需要精读的再用本工具的全文翻译实现快速知识摄取。
""")
with gr.Accordion("基础功能区", open=True) as area_basic_fn:
with gr.Row():
for k in functional:
@ -104,7 +125,7 @@ def main():
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",)
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
@ -144,11 +165,6 @@ def main():
resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
clearBtn.click(lambda: ("",""), None, [txt, txt2])
clearBtn2.click(lambda: ("",""), None, [txt, txt2])
if AUTO_CLEAR_TXT:
submitBtn.click(lambda: ("",""), None, [txt, txt2])
submitBtn2.click(lambda: ("",""), None, [txt, txt2])
txt.submit(lambda: ("",""), None, [txt, txt2])
txt2.submit(lambda: ("",""), None, [txt, txt2])
# 基础功能区的回调函数注册
for k in functional:
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
@ -172,6 +188,7 @@ def main():
ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
return ret
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
def on_md_dropdown_changed(k):
return {chatbot: gr.update(label="当前模型:"+k)}
md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
@ -185,6 +202,7 @@ def main():
# 终止按钮的回调函数注册
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
demo.load(on_dropdown_changed, inputs=gr.State("ArXiv Latex一键翻译输入区给定arXiv ID"), outputs=[switchy_bt, plugin_advanced_arg])
# gradio的inbrowser触发不太稳定回滚代码到原始的浏览器打开函数
def auto_opentab_delay():

View File

@ -152,7 +152,7 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
# chatglm 直接对齐到 chatglm2
# chatglm
"chatglm": {
"fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui,
@ -161,15 +161,6 @@ model_info = {
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
"chatglm2": {
"fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui,
"endpoint": None,
"max_token": 1024,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
# newbing
"newbing": {
"fn_with_ui": newbing_ui,

View File

@ -40,12 +40,12 @@ class GetGLMHandle(Process):
while True:
try:
if self.chatglm_model is None:
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE')
if device=='cpu':
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
else:
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval()
break
else:

View File

@ -28,7 +28,6 @@ proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def get_full_error(chunk, stream_response):
"""
获取完整的从Openai返回的报错
@ -41,9 +40,7 @@ def get_full_error(chunk, stream_response):
return chunk
def predict_no_ui_long_connection(
inputs, llm_kwargs, history=None, sys_prompt="", observe_window=None, console_slience=False
):
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
"""
发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs
@ -57,59 +54,45 @@ def predict_no_ui_long_connection(
observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
"""
if history is None:
history = []
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0
from bridge_all import model_info
while True:
try:
# make a POST request to the API endpoint, stream=False
from .bridge_all import model_info
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
response = requests.post(endpoint, headers=headers, proxies=proxies,
json=payload, stream=True, timeout=TIMEOUT_SECONDS)
stream_response = response.iter_lines()
break
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
except requests.exceptions.ReadTimeout as e:
retry += 1
traceback.print_exc()
if retry > MAX_RETRY:
raise TimeoutError
if MAX_RETRY != 0:
print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
except Exception as e:
print(f"出现异常:{e}")
raise e
if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
stream_response = response.iter_lines()
result = ''
while True:
try:
chunk = next(stream_response).decode()
try: chunk = next(stream_response).decode()
except StopIteration:
break
# except requests.exceptions.ConnectionError:
# chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk) == 0:
continue
except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue
if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
if "reduce the length" in error_msg:
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
else:
raise RuntimeError("OpenAI拒绝了请求" + error_msg)
if 'data: [DONE]' in chunk:
break # api2d 正常完成
if ('data: [DONE]' in chunk): break # api2d 正常完成
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
delta = json_data["delta"]
if len(delta) == 0:
break
if "role" in delta:
continue
if len(delta) == 0: break
if "role" in delta: continue
if "content" in delta:
result += delta["content"]
if not console_slience:
print(delta["content"], end='')
if not console_slience: print(delta["content"], end='')
if observe_window is not None:
# 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += delta["content"]
@ -117,8 +100,7 @@ def predict_no_ui_long_connection(
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。")
else:
raise RuntimeError("意外Json结构"+delta)
else: raise RuntimeError("意外Json结构"+delta)
if json_data['finish_reason'] == 'length':
raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。")
return result
@ -246,7 +228,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
return
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""
整合所有信息选择LLM模型生成http请求为发送请求做准备
@ -266,19 +247,23 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {"role": "user", "content": history[index]}
what_gpt_answer = {"role": "assistant", "content": history[index + 1]}
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "":
continue
if what_gpt_answer["content"] == timeout_bot_msg:
continue
if what_gpt_answer["content"] == "": continue
if what_gpt_answer["content"] == timeout_bot_msg: continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {"role": "user", "content": inputs}
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now)
payload = {
@ -293,8 +278,8 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
}
try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except Exception as e:
print(f'输入中可能存在乱码。抛出异常: {e}')
return headers, payload
except:
print('输入中可能存在乱码。')
return headers,payload

View File

@ -1,4 +1,4 @@
gradio>=3.33.1
./docs/gradio-3.32.2-py3-none-any.whl
tiktoken>=0.3.3
requests[socks]
transformers
@ -15,6 +15,4 @@ pymupdf
openai
numpy
arxiv
rich
langchain
zh_langchain
rich

10
sites/baidu_stats.html Normal file
View File

@ -0,0 +1,10 @@
<!-- baidu_stats.html -->
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "https://hm.baidu.com/hm.js?208673d55832a94b9bbe10b1f4e70c09";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>

View File

@ -21,7 +21,6 @@ pj = os.path.join
========================================================================
"""
class ChatBotWithCookies(list):
def __init__(self, cookie):
self._cookies = cookie
@ -72,13 +71,11 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面
assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时可用clear将其清空然后用for+append循环重新赋值。"
yield chatbot.get_cookies(), chatbot, history, msg
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
"""
刷新用户界面
"""
if len(chatbot) == 0:
chatbot.append(["update_ui_last_msg", lastmsg])
if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg])
chatbot[-1] = list(chatbot[-1])
chatbot[-1][-1] = lastmsg
yield from update_ui(chatbot=chatbot, history=history)
@ -86,25 +83,24 @@ def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
def trimmed_format_exc():
import os
import traceback
_str = traceback.format_exc()
import os, traceback
str = traceback.format_exc()
current_path = os.getcwd()
replace_path = "."
return _str.replace(current_path, replace_path)
return str.replace(current_path, replace_path)
def CatchException(f):
"""
装饰器函数捕捉函数f中的异常并封装到一个生成器中返回并显示到聊天当中。
"""
@wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
try:
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
except Exception as e:
from check_proxy import check_proxy
# from toolbox import get_conf # 不需要导入本文件内容
from toolbox import get_conf
proxies, = get_conf('proxies')
tb_str = '```\n' + trimmed_format_exc() + '```'
if len(chatbot) == 0:
@ -112,7 +108,7 @@ def CatchException(f):
chatbot.append(["插件调度异常", "异常原因"])
chatbot[-1] = (chatbot[-1][0],
f"[Local Message] 实验性函数调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
yield from update_ui(chatbot=chatbot, history=history, msg=f'异常 {e}') # 刷新界面
yield from update_ui(chatbot=chatbot, history=history, msg=f'异常 {e}') # 刷新界面
return decorated
@ -152,7 +148,6 @@ def HotReload(f):
========================================================================
"""
def get_reduce_token_percent(text):
"""
* 此函数未来将被弃用
@ -212,6 +207,8 @@ def regular_txt_to_markdown(text):
return text
def report_execption(chatbot, history, a, b):
"""
向chatbot中添加错误信息
@ -241,7 +238,6 @@ def text_divide_paragraph(text):
text = "</br>".join(lines)
return pre + text + suf
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt):
"""
@ -444,7 +440,6 @@ def find_recent_files(directory):
return recent_files
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区
import shutil
@ -457,7 +452,6 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
else: current = []
chatbot._cookies.update({'file_to_promote': [new_path] + current})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
"""
当文件被上传时的回调函数
@ -511,20 +505,17 @@ def on_report_generated(cookies, files, chatbot):
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
return cookies, report_files, chatbot
def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE)
def is_api2d_key(key):
if key.startswith('fk') and len(key) == 41:
return True
else:
return False
def is_any_api_key(key):
if ',' in key:
keys = key.split(',')
@ -534,7 +525,6 @@ def is_any_api_key(key):
else:
return is_openai_api_key(key) or is_api2d_key(key)
def what_keys(keys):
avail_key_list = {'OpenAI Key':0, "API2D Key":0}
key_list = keys.split(',')
@ -549,7 +539,6 @@ def what_keys(keys):
return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']}API2D Key {avail_key_list['API2D Key']}"
def select_api_key(keys, llm_model):
import random
avail_key_list = []
@ -569,7 +558,6 @@ def select_api_key(keys, llm_model):
api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key
def read_env_variable(arg, default_value):
"""
环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG`
@ -624,7 +612,6 @@ def read_env_variable(arg, default_value):
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
return r
@lru_cache(maxsize=128)
def read_single_conf_with_lru_cache(arg):
from colorful import print亮红, print亮绿, print亮蓝
@ -689,7 +676,6 @@ class DummyWith():
def __exit__(self, exc_type, exc_value, traceback):
return
def run_gradio_in_subpath(demo, auth, port, custom_path):
"""
把gradio的运行地址更改到指定的二次路径上
@ -784,7 +770,6 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
========================================================================
"""
def zip_folder(source_folder, dest_folder, zip_name):
import zipfile
import os
@ -816,7 +801,6 @@ def zip_folder(source_folder, dest_folder, zip_name):
print(f"Zip file created at {zip_file}")
def zip_result(folder):
import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
@ -827,7 +811,6 @@ def gen_time_str():
import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
class ProxyNetworkActivate():
"""
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
@ -847,14 +830,12 @@ class ProxyNetworkActivate():
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
return
def objdump(obj, file='objdump.tmp'):
import pickle
with open(file, 'wb+') as f:
pickle.dump(obj, f)
return
def objload(file='objdump.tmp'):
import pickle, os
if not os.path.exists(file):