Compare commits
28 Commits
proxy_bran
...
huggingfac
| Author | SHA1 | Date | |
|---|---|---|---|
| 96c1852abc | |||
| cd145c0794 | |||
| 7a4d4ad956 | |||
| 9f9848c6e9 | |||
| 94425c49fd | |||
| e874a16050 | |||
| c28388c5fe | |||
| b4a56d391b | |||
| 7075092f86 | |||
| 1086ff8092 | |||
| 3a22446b47 | |||
| 7842cf03cc | |||
| 54f55c32f2 | |||
| 94318ff0a2 | |||
| 5be6b83762 | |||
| 6f18d1716e | |||
| 90944bd744 | |||
| 752937cb70 | |||
| c584cbac5b | |||
| 309d12b404 | |||
| 52ea0acd61 | |||
| 9f5e3e0fd5 | |||
| 315e78e5d9 | |||
| b6b4ba684a | |||
| 2281a5ca7f | |||
| 49558686f2 | |||
| b050ccedb5 | |||
| ae56cab6f4 |
12
README.md
12
README.md
@ -1,3 +1,15 @@
|
|||||||
|
---
|
||||||
|
title: ChatImprovement
|
||||||
|
emoji: 😻
|
||||||
|
colorFrom: blue
|
||||||
|
colorTo: blue
|
||||||
|
sdk: gradio
|
||||||
|
sdk_version: 3.32.0
|
||||||
|
app_file: app.py
|
||||||
|
pinned: false
|
||||||
|
---
|
||||||
|
|
||||||
|
# ChatGPT 学术优化
|
||||||
> **Note**
|
> **Note**
|
||||||
>
|
>
|
||||||
> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**:
|
> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**:
|
||||||
|
|||||||
@ -1,13 +1,15 @@
|
|||||||
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
import subprocess, sys
|
||||||
|
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt"
|
if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "请用 pip install -r requirements.txt 安装依赖"
|
||||||
from request_llm.bridge_all import predict
|
from request_llm.bridge_all import predict
|
||||||
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
||||||
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
||||||
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS, AUTO_CLEAR_TXT = \
|
proxies, WEB_PORT, LLM_MODEL, CONCURRENT_COUNT, AUTHENTICATION, CHATBOT_HEIGHT, LAYOUT, API_KEY, AVAIL_LLM_MODELS = \
|
||||||
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS', 'AUTO_CLEAR_TXT')
|
get_conf('proxies', 'WEB_PORT', 'LLM_MODEL', 'CONCURRENT_COUNT', 'AUTHENTICATION', 'CHATBOT_HEIGHT', 'LAYOUT', 'API_KEY', 'AVAIL_LLM_MODELS')
|
||||||
|
|
||||||
# 如果WEB_PORT是-1, 则随机选取WEB端口
|
# 如果WEB_PORT是-1, 则随机选取WEB端口
|
||||||
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
PORT = find_free_port() if WEB_PORT <= 0 else WEB_PORT
|
||||||
@ -54,6 +56,7 @@ def main():
|
|||||||
cancel_handles = []
|
cancel_handles = []
|
||||||
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
||||||
gr.HTML(title_html)
|
gr.HTML(title_html)
|
||||||
|
gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
|
||||||
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
|
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
|
||||||
with gr_L1():
|
with gr_L1():
|
||||||
with gr_L2(scale=2):
|
with gr_L2(scale=2):
|
||||||
@ -63,7 +66,7 @@ def main():
|
|||||||
with gr_L2(scale=1):
|
with gr_L2(scale=1):
|
||||||
with gr.Accordion("输入区", open=True) as area_input_primary:
|
with gr.Accordion("输入区", open=True) as area_input_primary:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False)
|
txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥,输入多个密钥时,用英文逗号间隔。支持OpenAI密钥和API2D密钥共存。").style(container=False)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
submitBtn = gr.Button("提交", variant="primary")
|
submitBtn = gr.Button("提交", variant="primary")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
@ -104,7 +107,7 @@ def main():
|
|||||||
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
|
||||||
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
|
||||||
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
|
||||||
max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
|
max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",)
|
||||||
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
|
||||||
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
|
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
|
||||||
|
|
||||||
@ -144,11 +147,6 @@ def main():
|
|||||||
resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
|
resetBtn2.click(lambda: ([], [], "已重置"), None, [chatbot, history, status])
|
||||||
clearBtn.click(lambda: ("",""), None, [txt, txt2])
|
clearBtn.click(lambda: ("",""), None, [txt, txt2])
|
||||||
clearBtn2.click(lambda: ("",""), None, [txt, txt2])
|
clearBtn2.click(lambda: ("",""), None, [txt, txt2])
|
||||||
if AUTO_CLEAR_TXT:
|
|
||||||
submitBtn.click(lambda: ("",""), None, [txt, txt2])
|
|
||||||
submitBtn2.click(lambda: ("",""), None, [txt, txt2])
|
|
||||||
txt.submit(lambda: ("",""), None, [txt, txt2])
|
|
||||||
txt2.submit(lambda: ("",""), None, [txt, txt2])
|
|
||||||
# 基础功能区的回调函数注册
|
# 基础功能区的回调函数注册
|
||||||
for k in functional:
|
for k in functional:
|
||||||
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
|
if ("Visible" in functional[k]) and (not functional[k]["Visible"]): continue
|
||||||
@ -202,10 +200,7 @@ def main():
|
|||||||
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
|
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
|
||||||
|
|
||||||
auto_opentab_delay()
|
auto_opentab_delay()
|
||||||
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
|
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
||||||
server_name="0.0.0.0", server_port=PORT,
|
|
||||||
favicon_path="docs/logo.png", auth=AUTHENTICATION,
|
|
||||||
blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
|
||||||
|
|
||||||
# 如果需要在二级路径下运行
|
# 如果需要在二级路径下运行
|
||||||
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')
|
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')
|
||||||
@ -1,10 +1,10 @@
|
|||||||
|
|
||||||
def check_proxy(proxies: dict):
|
def check_proxy(proxies):
|
||||||
import requests
|
import requests
|
||||||
proxies_https = proxies.get('https') if proxies is not None else '无'
|
proxies_https = proxies['https'] if proxies is not None else '无'
|
||||||
try:
|
try:
|
||||||
response = requests.get("https://ipapi.co/json/",
|
response = requests.get("https://ipapi.co/json/",
|
||||||
proxies=proxies, timeout=30)
|
proxies=proxies, timeout=4)
|
||||||
data = response.json()
|
data = response.json()
|
||||||
print(f'查询代理的地理位置,返回的结果是{data}')
|
print(f'查询代理的地理位置,返回的结果是{data}')
|
||||||
if 'country_name' in data:
|
if 'country_name' in data:
|
||||||
@ -12,12 +12,10 @@ def check_proxy(proxies: dict):
|
|||||||
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
result = f"代理配置 {proxies_https}, 代理所在地:{country}"
|
||||||
elif 'error' in data:
|
elif 'error' in data:
|
||||||
result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限"
|
result = f"代理配置 {proxies_https}, 代理所在地:未知,IP查询频率受限"
|
||||||
else:
|
|
||||||
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
|
|
||||||
print(result)
|
print(result)
|
||||||
return result
|
return result
|
||||||
except Exception as e:
|
except:
|
||||||
result = f"代理 {proxies_https} 查询出现异常: {e},代理可能无效"
|
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效"
|
||||||
print(result)
|
print(result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|||||||
10
config.py
10
config.py
@ -45,10 +45,9 @@ WEB_PORT = -1
|
|||||||
# 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
|
# 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
|
||||||
MAX_RETRY = 2
|
MAX_RETRY = 2
|
||||||
|
|
||||||
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
|
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
||||||
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
|
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
|
||||||
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
|
AVAIL_LLM_MODELS = ["newbing-free", "gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo"]
|
||||||
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
|
|
||||||
|
|
||||||
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
|
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
|
||||||
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
||||||
@ -56,9 +55,6 @@ LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
|||||||
# 设置gradio的并行线程数(不需要修改)
|
# 设置gradio的并行线程数(不需要修改)
|
||||||
CONCURRENT_COUNT = 100
|
CONCURRENT_COUNT = 100
|
||||||
|
|
||||||
# 是否在提交时自动清空输入框
|
|
||||||
AUTO_CLEAR_TXT = False
|
|
||||||
|
|
||||||
# 加一个live2d装饰
|
# 加一个live2d装饰
|
||||||
ADD_WAIFU = False
|
ADD_WAIFU = False
|
||||||
|
|
||||||
|
|||||||
@ -63,7 +63,6 @@ def get_core_functions():
|
|||||||
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," +
|
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," +
|
||||||
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
|
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
|
||||||
"Suffix": r"",
|
"Suffix": r"",
|
||||||
"Visible": False,
|
|
||||||
},
|
},
|
||||||
"解释代码": {
|
"解释代码": {
|
||||||
"Prefix": r"请解释以下代码:" + "\n```\n",
|
"Prefix": r"请解释以下代码:" + "\n```\n",
|
||||||
@ -74,5 +73,6 @@ def get_core_functions():
|
|||||||
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
|
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
|
||||||
r"Items need to be transformed:",
|
r"Items need to be transformed:",
|
||||||
"Suffix": r"",
|
"Suffix": r"",
|
||||||
|
"Visible": False,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -193,9 +193,8 @@ def test_Latex():
|
|||||||
# txt = r"https://arxiv.org/abs/2212.10156"
|
# txt = r"https://arxiv.org/abs/2212.10156"
|
||||||
# txt = r"https://arxiv.org/abs/2211.11559"
|
# txt = r"https://arxiv.org/abs/2211.11559"
|
||||||
# txt = r"https://arxiv.org/abs/2303.08774"
|
# txt = r"https://arxiv.org/abs/2303.08774"
|
||||||
# txt = r"https://arxiv.org/abs/2303.12712"
|
txt = r"https://arxiv.org/abs/2303.12712"
|
||||||
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
|
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
|
||||||
txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误!
|
|
||||||
|
|
||||||
|
|
||||||
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
|
|||||||
@ -1,19 +1,16 @@
|
|||||||
from toolbox import update_ui, get_conf, trimmed_format_exc
|
from toolbox import update_ui, get_conf, trimmed_format_exc
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
|
|
||||||
def input_clipping(inputs, history, max_token_limit):
|
def input_clipping(inputs, history, max_token_limit):
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from request_llm.bridge_all import model_info
|
from request_llm.bridge_all import model_info
|
||||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||||
|
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
|
||||||
def get_token_num(txt):
|
|
||||||
return len(enc.encode(txt, disallowed_special=()))
|
|
||||||
|
|
||||||
mode = 'input-and-history'
|
mode = 'input-and-history'
|
||||||
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
|
||||||
input_token_num = get_token_num(inputs)
|
input_token_num = get_token_num(inputs)
|
||||||
if input_token_num < max_token_limit // 2:
|
if input_token_num < max_token_limit//2:
|
||||||
mode = 'only-history'
|
mode = 'only-history'
|
||||||
max_token_limit = max_token_limit - input_token_num
|
max_token_limit = max_token_limit - input_token_num
|
||||||
|
|
||||||
@ -21,13 +18,13 @@ def input_clipping(inputs, history, max_token_limit):
|
|||||||
everything.extend(history)
|
everything.extend(history)
|
||||||
n_token = get_token_num('\n'.join(everything))
|
n_token = get_token_num('\n'.join(everything))
|
||||||
everything_token = [get_token_num(e) for e in everything]
|
everything_token = [get_token_num(e) for e in everything]
|
||||||
delta = max(everything_token) // 16 # 截断时的颗粒度
|
delta = max(everything_token) // 16 # 截断时的颗粒度
|
||||||
|
|
||||||
while n_token > max_token_limit:
|
while n_token > max_token_limit:
|
||||||
where = np.argmax(everything_token)
|
where = np.argmax(everything_token)
|
||||||
encoded = enc.encode(everything[where], disallowed_special=())
|
encoded = enc.encode(everything[where], disallowed_special=())
|
||||||
clipped_encoded = encoded[:len(encoded) - delta]
|
clipped_encoded = encoded[:len(encoded)-delta]
|
||||||
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
|
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
|
||||||
everything_token[where] = get_token_num(everything[where])
|
everything_token[where] = get_token_num(everything[where])
|
||||||
n_token = get_token_num('\n'.join(everything))
|
n_token = get_token_num('\n'.join(everything))
|
||||||
|
|
||||||
@ -38,13 +35,12 @@ def input_clipping(inputs, history, max_token_limit):
|
|||||||
history = everything[1:]
|
history = everything[1:]
|
||||||
return inputs, history
|
return inputs, history
|
||||||
|
|
||||||
|
|
||||||
def request_gpt_model_in_new_thread_with_ui_alive(
|
def request_gpt_model_in_new_thread_with_ui_alive(
|
||||||
inputs, inputs_show_user, llm_kwargs,
|
inputs, inputs_show_user, llm_kwargs,
|
||||||
chatbot, history, sys_prompt, refresh_interval=0.2,
|
chatbot, history, sys_prompt, refresh_interval=0.2,
|
||||||
handle_token_exceed=True,
|
handle_token_exceed=True,
|
||||||
retry_times_at_unknown_error=2,
|
retry_times_at_unknown_error=2,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Request GPT model,请求GPT模型同时维持用户界面活跃。
|
Request GPT model,请求GPT模型同时维持用户界面活跃。
|
||||||
|
|
||||||
@ -68,16 +64,15 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|||||||
from request_llm.bridge_all import predict_no_ui_long_connection
|
from request_llm.bridge_all import predict_no_ui_long_connection
|
||||||
# 用户反馈
|
# 用户反馈
|
||||||
chatbot.append([inputs_show_user, ""])
|
chatbot.append([inputs_show_user, ""])
|
||||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||||
executor = ThreadPoolExecutor(max_workers=16)
|
executor = ThreadPoolExecutor(max_workers=16)
|
||||||
mutable = ["", time.time(), ""]
|
mutable = ["", time.time(), ""]
|
||||||
|
|
||||||
def _req_gpt(inputs, history, sys_prompt):
|
def _req_gpt(inputs, history, sys_prompt):
|
||||||
retry_op = retry_times_at_unknown_error
|
retry_op = retry_times_at_unknown_error
|
||||||
exceeded_cnt = 0
|
exceeded_cnt = 0
|
||||||
while True:
|
while True:
|
||||||
# watchdog error
|
# watchdog error
|
||||||
if len(mutable) >= 2 and (time.time() - mutable[1]) > 5:
|
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5:
|
||||||
raise RuntimeError("检测到程序终止。")
|
raise RuntimeError("检测到程序终止。")
|
||||||
try:
|
try:
|
||||||
# 【第一种情况】:顺利完成
|
# 【第一种情况】:顺利完成
|
||||||
@ -94,14 +89,14 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|||||||
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
||||||
MAX_TOKEN = 4096
|
MAX_TOKEN = 4096
|
||||||
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
||||||
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
|
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
||||||
mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
mutable[0] += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
||||||
continue # 返回重试
|
continue # 返回重试
|
||||||
else:
|
else:
|
||||||
# 【选择放弃】
|
# 【选择放弃】
|
||||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||||
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||||
return mutable[0] # 放弃
|
return mutable[0] # 放弃
|
||||||
except:
|
except:
|
||||||
# 【第三种情况】:其他错误:重试几次
|
# 【第三种情况】:其他错误:重试几次
|
||||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||||
@ -109,15 +104,14 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|||||||
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||||
if retry_op > 0:
|
if retry_op > 0:
|
||||||
retry_op -= 1
|
retry_op -= 1
|
||||||
mutable[
|
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}:\n\n"
|
||||||
0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}:\n\n"
|
|
||||||
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
||||||
time.sleep(30)
|
time.sleep(30)
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
continue # 返回重试
|
continue # 返回重试
|
||||||
else:
|
else:
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return mutable[0] # 放弃
|
return mutable[0] # 放弃
|
||||||
|
|
||||||
# 提交任务
|
# 提交任务
|
||||||
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
|
future = executor.submit(_req_gpt, inputs, history, sys_prompt)
|
||||||
@ -129,21 +123,21 @@ def request_gpt_model_in_new_thread_with_ui_alive(
|
|||||||
if future.done():
|
if future.done():
|
||||||
break
|
break
|
||||||
chatbot[-1] = [chatbot[-1][0], mutable[0]]
|
chatbot[-1] = [chatbot[-1][0], mutable[0]]
|
||||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||||
|
|
||||||
final_result = future.result()
|
final_result = future.result()
|
||||||
chatbot[-1] = [chatbot[-1][0], final_result]
|
chatbot[-1] = [chatbot[-1][0], final_result]
|
||||||
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
|
yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了,则删除报错信息
|
||||||
return final_result
|
return final_result
|
||||||
|
|
||||||
|
|
||||||
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||||
inputs_array, inputs_show_user_array, llm_kwargs,
|
inputs_array, inputs_show_user_array, llm_kwargs,
|
||||||
chatbot, history_array, sys_prompt_array,
|
chatbot, history_array, sys_prompt_array,
|
||||||
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
|
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
|
||||||
handle_token_exceed=True, show_user_at_complete=False,
|
handle_token_exceed=True, show_user_at_complete=False,
|
||||||
retry_times_at_unknown_error=2,
|
retry_times_at_unknown_error=2,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Request GPT model using multiple threads with UI and high efficiency
|
Request GPT model using multiple threads with UI and high efficiency
|
||||||
请求GPT模型的[多线程]版。
|
请求GPT模型的[多线程]版。
|
||||||
@ -176,21 +170,19 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|||||||
from request_llm.bridge_all import predict_no_ui_long_connection
|
from request_llm.bridge_all import predict_no_ui_long_connection
|
||||||
assert len(inputs_array) == len(history_array)
|
assert len(inputs_array) == len(history_array)
|
||||||
assert len(inputs_array) == len(sys_prompt_array)
|
assert len(inputs_array) == len(sys_prompt_array)
|
||||||
if max_workers == -1: # 读取配置文件
|
if max_workers == -1: # 读取配置文件
|
||||||
try:
|
try: max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
||||||
max_workers, = get_conf('DEFAULT_WORKER_NUM')
|
except: max_workers = 8
|
||||||
except:
|
|
||||||
max_workers = 8
|
|
||||||
if max_workers <= 0: max_workers = 3
|
if max_workers <= 0: max_workers = 3
|
||||||
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
# 屏蔽掉 chatglm的多线程,可能会导致严重卡顿
|
||||||
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
|
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
|
||||||
max_workers = 1
|
max_workers = 1
|
||||||
|
|
||||||
executor = ThreadPoolExecutor(max_workers=max_workers)
|
executor = ThreadPoolExecutor(max_workers=max_workers)
|
||||||
n_frag = len(inputs_array)
|
n_frag = len(inputs_array)
|
||||||
# 用户反馈
|
# 用户反馈
|
||||||
chatbot.append(["请开始多线程操作。", ""])
|
chatbot.append(["请开始多线程操作。", ""])
|
||||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||||
# 跨线程传递
|
# 跨线程传递
|
||||||
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
mutable = [["", time.time(), "等待中"] for _ in range(n_frag)]
|
||||||
|
|
||||||
@ -202,13 +194,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|||||||
mutable[index][2] = "执行中"
|
mutable[index][2] = "执行中"
|
||||||
while True:
|
while True:
|
||||||
# watchdog error
|
# watchdog error
|
||||||
if len(mutable[index]) >= 2 and (time.time() - mutable[index][1]) > 5:
|
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5:
|
||||||
raise RuntimeError("检测到程序终止。")
|
raise RuntimeError("检测到程序终止。")
|
||||||
try:
|
try:
|
||||||
# 【第一种情况】:顺利完成
|
# 【第一种情况】:顺利完成
|
||||||
# time.sleep(10); raise RuntimeError("测试")
|
# time.sleep(10); raise RuntimeError("测试")
|
||||||
gpt_say = predict_no_ui_long_connection(
|
gpt_say = predict_no_ui_long_connection(
|
||||||
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
|
inputs=inputs, llm_kwargs=llm_kwargs, history=history,
|
||||||
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
|
sys_prompt=sys_prompt, observe_window=mutable[index], console_slience=True
|
||||||
)
|
)
|
||||||
mutable[index][2] = "已成功"
|
mutable[index][2] = "已成功"
|
||||||
@ -222,26 +214,24 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|||||||
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
|
||||||
MAX_TOKEN = 4096
|
MAX_TOKEN = 4096
|
||||||
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
EXCEED_ALLO = 512 + 512 * exceeded_cnt
|
||||||
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
|
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO)
|
||||||
gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
gpt_say += f'[Local Message] 警告,文本过长将进行截断,Token溢出数:{n_exceed}。\n\n'
|
||||||
mutable[index][2] = f"截断重试"
|
mutable[index][2] = f"截断重试"
|
||||||
continue # 返回重试
|
continue # 返回重试
|
||||||
else:
|
else:
|
||||||
# 【选择放弃】
|
# 【选择放弃】
|
||||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||||
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||||
if len(mutable[index][0]) > 0:
|
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
||||||
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
|
||||||
mutable[index][2] = "输入过长已放弃"
|
mutable[index][2] = "输入过长已放弃"
|
||||||
return gpt_say # 放弃
|
return gpt_say # 放弃
|
||||||
except Exception as e:
|
except:
|
||||||
# 【第三种情况】:其他错误
|
# 【第三种情况】:其他错误
|
||||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||||
print(f"发生异常:{e}, 调用栈信息:{tb_str}")
|
print(tb_str)
|
||||||
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback:\n\n{tb_str}\n\n"
|
||||||
if len(mutable[index][0]) > 0:
|
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
||||||
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
|
if retry_op > 0:
|
||||||
if retry_op > 0:
|
|
||||||
retry_op -= 1
|
retry_op -= 1
|
||||||
wait = random.randint(5, 20)
|
wait = random.randint(5, 20)
|
||||||
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
|
||||||
@ -251,22 +241,19 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|||||||
fail_info = ""
|
fail_info = ""
|
||||||
# 也许等待十几秒后,情况会好转
|
# 也许等待十几秒后,情况会好转
|
||||||
for i in range(wait):
|
for i in range(wait):
|
||||||
mutable[index][2] = f"{fail_info}等待重试 {wait - i}";
|
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1)
|
||||||
time.sleep(1)
|
|
||||||
# 开始重试
|
# 开始重试
|
||||||
mutable[index][
|
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}"
|
||||||
2] = f"重试中 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}"
|
continue # 返回重试
|
||||||
continue # 返回重试
|
|
||||||
else:
|
else:
|
||||||
mutable[index][2] = "已失败"
|
mutable[index][2] = "已失败"
|
||||||
wait = 5
|
wait = 5
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
return gpt_say # 放弃
|
return gpt_say # 放弃
|
||||||
|
|
||||||
# 异步任务开始
|
# 异步任务开始
|
||||||
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in
|
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip(
|
||||||
zip(
|
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
|
||||||
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
|
|
||||||
cnt = 0
|
cnt = 0
|
||||||
while True:
|
while True:
|
||||||
# yield一次以刷新前端页面
|
# yield一次以刷新前端页面
|
||||||
@ -280,17 +267,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|||||||
mutable[thread_index][1] = time.time()
|
mutable[thread_index][1] = time.time()
|
||||||
# 在前端打印些好玩的东西
|
# 在前端打印些好玩的东西
|
||||||
for thread_index, _ in enumerate(worker_done):
|
for thread_index, _ in enumerate(worker_done):
|
||||||
print_something_really_funny = "[ ...`" + mutable[thread_index][0][-scroller_max_len:]. \
|
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\
|
||||||
replace('\n', '').replace('```', '...').replace(
|
replace('\n', '').replace('```', '...').replace(
|
||||||
' ', '.').replace('<br/>', '.....').replace('$', '.') + "`... ]"
|
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]"
|
||||||
observe_win.append(print_something_really_funny)
|
observe_win.append(print_something_really_funny)
|
||||||
# 在前端打印些好玩的东西
|
# 在前端打印些好玩的东西
|
||||||
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
|
||||||
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
if not done else f'`{mutable[thread_index][2]}`\n\n'
|
||||||
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
|
||||||
# 在前端打印些好玩的东西
|
# 在前端打印些好玩的东西
|
||||||
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.'] * (cnt % 10 + 1))]
|
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
|
||||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||||
if all(worker_done):
|
if all(worker_done):
|
||||||
executor.shutdown()
|
executor.shutdown()
|
||||||
break
|
break
|
||||||
@ -300,13 +287,13 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
|||||||
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
||||||
gpt_res = f.result()
|
gpt_res = f.result()
|
||||||
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
gpt_response_collection.extend([inputs_show_user, gpt_res])
|
||||||
|
|
||||||
# 是否在结束时,在界面上显示结果
|
# 是否在结束时,在界面上显示结果
|
||||||
if show_user_at_complete:
|
if show_user_at_complete:
|
||||||
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
for inputs_show_user, f in zip(inputs_show_user_array, futures):
|
||||||
gpt_res = f.result()
|
gpt_res = f.result()
|
||||||
chatbot.append([inputs_show_user, gpt_res])
|
chatbot.append([inputs_show_user, gpt_res])
|
||||||
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
|
||||||
time.sleep(0.3)
|
time.sleep(0.3)
|
||||||
return gpt_response_collection
|
return gpt_response_collection
|
||||||
|
|
||||||
@ -319,7 +306,6 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
|||||||
lines = txt_tocut.split('\n')
|
lines = txt_tocut.split('\n')
|
||||||
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
|
||||||
estimated_line_cut = int(estimated_line_cut)
|
estimated_line_cut = int(estimated_line_cut)
|
||||||
cnt = 0
|
|
||||||
for cnt in reversed(range(estimated_line_cut)):
|
for cnt in reversed(range(estimated_line_cut)):
|
||||||
if must_break_at_empty_line:
|
if must_break_at_empty_line:
|
||||||
if lines[cnt] != "":
|
if lines[cnt] != "":
|
||||||
@ -336,7 +322,6 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
|
|||||||
result = [prev]
|
result = [prev]
|
||||||
result.extend(cut(post, must_break_at_empty_line))
|
result.extend(cut(post, must_break_at_empty_line))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return cut(txt, must_break_at_empty_line=True)
|
return cut(txt, must_break_at_empty_line=True)
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
@ -352,10 +337,9 @@ def force_breakdown(txt, limit, get_token_fn):
|
|||||||
return txt[:i], txt[i:]
|
return txt[:i], txt[i:]
|
||||||
return "Tiktoken未知错误", "Tiktoken未知错误"
|
return "Tiktoken未知错误", "Tiktoken未知错误"
|
||||||
|
|
||||||
|
|
||||||
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
||||||
# 递归
|
# 递归
|
||||||
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
|
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
|
||||||
if get_token_fn(txt_tocut) <= limit:
|
if get_token_fn(txt_tocut) <= limit:
|
||||||
return [txt_tocut]
|
return [txt_tocut]
|
||||||
else:
|
else:
|
||||||
@ -381,7 +365,6 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
|||||||
result = [prev]
|
result = [prev]
|
||||||
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
|
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 第1次尝试,将双空行(\n\n)作为切分点
|
# 第1次尝试,将双空行(\n\n)作为切分点
|
||||||
return cut(txt, must_break_at_empty_line=True)
|
return cut(txt, must_break_at_empty_line=True)
|
||||||
@ -392,7 +375,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
|||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
try:
|
try:
|
||||||
# 第3次尝试,将英文句号(.)作为切分点
|
# 第3次尝试,将英文句号(.)作为切分点
|
||||||
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
res = cut(txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
|
||||||
return [r.replace('。\n', '.') for r in res]
|
return [r.replace('。\n', '.') for r in res]
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
try:
|
try:
|
||||||
@ -404,6 +387,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
|
|||||||
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
|
return cut(txt, must_break_at_empty_line=False, break_anyway=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def read_and_clean_pdf_text(fp):
|
def read_and_clean_pdf_text(fp):
|
||||||
"""
|
"""
|
||||||
这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
|
这个函数用于分割pdf,用了很多trick,逻辑较乱,效果奇好
|
||||||
@ -431,9 +415,8 @@ def read_and_clean_pdf_text(fp):
|
|||||||
fc = 0 # Index 0 文本
|
fc = 0 # Index 0 文本
|
||||||
fs = 1 # Index 1 字体
|
fs = 1 # Index 1 字体
|
||||||
fb = 2 # Index 2 框框
|
fb = 2 # Index 2 框框
|
||||||
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
|
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
|
||||||
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
|
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的?时,判定为不是正文(有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
|
||||||
|
|
||||||
def primary_ffsize(l):
|
def primary_ffsize(l):
|
||||||
"""
|
"""
|
||||||
提取文本块主字体
|
提取文本块主字体
|
||||||
@ -443,12 +426,12 @@ def read_and_clean_pdf_text(fp):
|
|||||||
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
if wtf['size'] not in fsize_statiscs: fsize_statiscs[wtf['size']] = 0
|
||||||
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
fsize_statiscs[wtf['size']] += len(wtf['text'])
|
||||||
return max(fsize_statiscs, key=fsize_statiscs.get)
|
return max(fsize_statiscs, key=fsize_statiscs.get)
|
||||||
|
|
||||||
def ffsize_same(a, b):
|
def ffsize_same(a,b):
|
||||||
"""
|
"""
|
||||||
提取字体大小是否近似相等
|
提取字体大小是否近似相等
|
||||||
"""
|
"""
|
||||||
return abs((a - b) / max(a, b)) < 0.02
|
return abs((a-b)/max(a,b)) < 0.02
|
||||||
|
|
||||||
with fitz.open(fp) as doc:
|
with fitz.open(fp) as doc:
|
||||||
meta_txt = []
|
meta_txt = []
|
||||||
@ -468,19 +451,18 @@ def read_and_clean_pdf_text(fp):
|
|||||||
if len(txt_line) == 0: continue
|
if len(txt_line) == 0: continue
|
||||||
pf = primary_ffsize(l)
|
pf = primary_ffsize(l)
|
||||||
meta_line.append([txt_line, pf, l['bbox'], l])
|
meta_line.append([txt_line, pf, l['bbox'], l])
|
||||||
for wtf in l['spans']: # for l in t['lines']:
|
for wtf in l['spans']: # for l in t['lines']:
|
||||||
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
|
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
|
||||||
# meta_line.append(["NEW_BLOCK", pf])
|
# meta_line.append(["NEW_BLOCK", pf])
|
||||||
# 块元提取 for each word segment with in line for each line
|
# 块元提取 for each word segment with in line for each line cross-line words for each block
|
||||||
# cross-line words for each block
|
|
||||||
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
||||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
|
'- ', '') for t in text_areas['blocks'] if 'lines' in t])
|
||||||
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
|
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
|
||||||
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
|
for l in t['lines']]) for t in text_areas['blocks'] if 'lines' in t])
|
||||||
if index == 0:
|
if index == 0:
|
||||||
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
page_one_meta = [" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
|
||||||
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
'- ', '') for t in text_areas['blocks'] if 'lines' in t]
|
||||||
|
|
||||||
############################## <第 2 步,获取正文主字体> ##################################
|
############################## <第 2 步,获取正文主字体> ##################################
|
||||||
fsize_statiscs = {}
|
fsize_statiscs = {}
|
||||||
for span in meta_span:
|
for span in meta_span:
|
||||||
@ -494,33 +476,32 @@ def read_and_clean_pdf_text(fp):
|
|||||||
mega_sec = []
|
mega_sec = []
|
||||||
sec = []
|
sec = []
|
||||||
for index, line in enumerate(meta_line):
|
for index, line in enumerate(meta_line):
|
||||||
if index == 0:
|
if index == 0:
|
||||||
sec.append(line[fc])
|
sec.append(line[fc])
|
||||||
continue
|
continue
|
||||||
if REMOVE_FOOT_NOTE:
|
if REMOVE_FOOT_NOTE:
|
||||||
if meta_line[index][fs] <= give_up_fize_threshold:
|
if meta_line[index][fs] <= give_up_fize_threshold:
|
||||||
continue
|
continue
|
||||||
if ffsize_same(meta_line[index][fs], meta_line[index - 1][fs]):
|
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]):
|
||||||
# 尝试识别段落
|
# 尝试识别段落
|
||||||
if meta_line[index][fc].endswith('.') and \
|
if meta_line[index][fc].endswith('.') and\
|
||||||
(meta_line[index - 1][fc] != 'NEW_BLOCK') and \
|
(meta_line[index-1][fc] != 'NEW_BLOCK') and \
|
||||||
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (
|
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7:
|
||||||
meta_line[index - 1][fb][2] - meta_line[index - 1][fb][0]) * 0.7:
|
|
||||||
sec[-1] += line[fc]
|
sec[-1] += line[fc]
|
||||||
sec[-1] += "\n\n"
|
sec[-1] += "\n\n"
|
||||||
else:
|
else:
|
||||||
sec[-1] += " "
|
sec[-1] += " "
|
||||||
sec[-1] += line[fc]
|
sec[-1] += line[fc]
|
||||||
else:
|
else:
|
||||||
if (index + 1 < len(meta_line)) and \
|
if (index+1 < len(meta_line)) and \
|
||||||
meta_line[index][fs] > main_fsize:
|
meta_line[index][fs] > main_fsize:
|
||||||
# 单行 + 字体大
|
# 单行 + 字体大
|
||||||
mega_sec.append(copy.deepcopy(sec))
|
mega_sec.append(copy.deepcopy(sec))
|
||||||
sec = []
|
sec = []
|
||||||
sec.append("# " + line[fc])
|
sec.append("# " + line[fc])
|
||||||
else:
|
else:
|
||||||
# 尝试识别section
|
# 尝试识别section
|
||||||
if meta_line[index - 1][fs] > meta_line[index][fs]:
|
if meta_line[index-1][fs] > meta_line[index][fs]:
|
||||||
sec.append("\n" + line[fc])
|
sec.append("\n" + line[fc])
|
||||||
else:
|
else:
|
||||||
sec.append(line[fc])
|
sec.append(line[fc])
|
||||||
@ -539,15 +520,13 @@ def read_and_clean_pdf_text(fp):
|
|||||||
if len(block_txt) < 100:
|
if len(block_txt) < 100:
|
||||||
meta_txt[index] = '\n'
|
meta_txt[index] = '\n'
|
||||||
return meta_txt
|
return meta_txt
|
||||||
|
|
||||||
meta_txt = 把字符太少的块清除为回车(meta_txt)
|
meta_txt = 把字符太少的块清除为回车(meta_txt)
|
||||||
|
|
||||||
def 清理多余的空行(meta_txt):
|
def 清理多余的空行(meta_txt):
|
||||||
for index in reversed(range(1, len(meta_txt))):
|
for index in reversed(range(1, len(meta_txt))):
|
||||||
if meta_txt[index] == '\n' and meta_txt[index - 1] == '\n':
|
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n':
|
||||||
meta_txt.pop(index)
|
meta_txt.pop(index)
|
||||||
return meta_txt
|
return meta_txt
|
||||||
|
|
||||||
meta_txt = 清理多余的空行(meta_txt)
|
meta_txt = 清理多余的空行(meta_txt)
|
||||||
|
|
||||||
def 合并小写开头的段落块(meta_txt):
|
def 合并小写开头的段落块(meta_txt):
|
||||||
@ -558,18 +537,16 @@ def read_and_clean_pdf_text(fp):
|
|||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
for _ in range(100):
|
for _ in range(100):
|
||||||
for index, block_txt in enumerate(meta_txt):
|
for index, block_txt in enumerate(meta_txt):
|
||||||
if starts_with_lowercase_word(block_txt):
|
if starts_with_lowercase_word(block_txt):
|
||||||
if meta_txt[index - 1] != '\n':
|
if meta_txt[index-1] != '\n':
|
||||||
meta_txt[index - 1] += ' '
|
meta_txt[index-1] += ' '
|
||||||
else:
|
else:
|
||||||
meta_txt[index - 1] = ''
|
meta_txt[index-1] = ''
|
||||||
meta_txt[index - 1] += meta_txt[index]
|
meta_txt[index-1] += meta_txt[index]
|
||||||
meta_txt[index] = '\n'
|
meta_txt[index] = '\n'
|
||||||
return meta_txt
|
return meta_txt
|
||||||
|
|
||||||
meta_txt = 合并小写开头的段落块(meta_txt)
|
meta_txt = 合并小写开头的段落块(meta_txt)
|
||||||
meta_txt = 清理多余的空行(meta_txt)
|
meta_txt = 清理多余的空行(meta_txt)
|
||||||
|
|
||||||
@ -589,7 +566,7 @@ def read_and_clean_pdf_text(fp):
|
|||||||
return meta_txt, page_one_meta
|
return meta_txt, page_one_meta
|
||||||
|
|
||||||
|
|
||||||
def get_files_from_everything(txt, type): # type='.md'
|
def get_files_from_everything(txt, type): # type='.md'
|
||||||
"""
|
"""
|
||||||
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
这个函数是用来获取指定目录下所有指定类型(如.md)的文件,并且对于网络上的文件,也可以获取它。
|
||||||
下面是对每个参数和返回值的说明:
|
下面是对每个参数和返回值的说明:
|
||||||
@ -611,10 +588,9 @@ def get_files_from_everything(txt, type): # type='.md'
|
|||||||
from toolbox import get_conf
|
from toolbox import get_conf
|
||||||
proxies, = get_conf('proxies')
|
proxies, = get_conf('proxies')
|
||||||
r = requests.get(txt, proxies=proxies)
|
r = requests.get(txt, proxies=proxies)
|
||||||
with open('./gpt_log/temp' + type, 'wb+') as f:
|
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content)
|
||||||
f.write(r.content)
|
|
||||||
project_folder = './gpt_log/'
|
project_folder = './gpt_log/'
|
||||||
file_manifest = ['./gpt_log/temp' + type]
|
file_manifest = ['./gpt_log/temp'+type]
|
||||||
elif txt.endswith(type):
|
elif txt.endswith(type):
|
||||||
# 直接给定文件
|
# 直接给定文件
|
||||||
file_manifest = [txt]
|
file_manifest = [txt]
|
||||||
@ -622,7 +598,7 @@ def get_files_from_everything(txt, type): # type='.md'
|
|||||||
elif os.path.exists(txt):
|
elif os.path.exists(txt):
|
||||||
# 本地路径,递归搜索
|
# 本地路径,递归搜索
|
||||||
project_folder = txt
|
project_folder = txt
|
||||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*' + type, recursive=True)]
|
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)]
|
||||||
if len(file_manifest) == 0:
|
if len(file_manifest) == 0:
|
||||||
success = False
|
success = False
|
||||||
else:
|
else:
|
||||||
@ -633,14 +609,16 @@ def get_files_from_everything(txt, type): # type='.md'
|
|||||||
return success, file_manifest, project_folder
|
return success, file_manifest, project_folder
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def Singleton(cls):
|
def Singleton(cls):
|
||||||
_instance = {}
|
_instance = {}
|
||||||
|
|
||||||
def _singleton(*args, **kargs):
|
def _singleton(*args, **kargs):
|
||||||
if cls not in _instance:
|
if cls not in _instance:
|
||||||
_instance[cls] = cls(*args, **kargs)
|
_instance[cls] = cls(*args, **kargs)
|
||||||
return _instance[cls]
|
return _instance[cls]
|
||||||
|
|
||||||
return _singleton
|
return _singleton
|
||||||
|
|
||||||
|
|
||||||
@ -659,30 +637,31 @@ class knowledge_archive_interface():
|
|||||||
from toolbox import ProxyNetworkActivate
|
from toolbox import ProxyNetworkActivate
|
||||||
print('Checking Text2vec ...')
|
print('Checking Text2vec ...')
|
||||||
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
|
||||||
with ProxyNetworkActivate(): # 临时地激活代理网络
|
with ProxyNetworkActivate(): # 临时地激活代理网络
|
||||||
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
|
||||||
|
|
||||||
return self.text2vec_large_chinese
|
return self.text2vec_large_chinese
|
||||||
|
|
||||||
|
|
||||||
def feed_archive(self, file_manifest, id="default"):
|
def feed_archive(self, file_manifest, id="default"):
|
||||||
self.threadLock.acquire()
|
self.threadLock.acquire()
|
||||||
# import uuid
|
# import uuid
|
||||||
self.current_id = id
|
self.current_id = id
|
||||||
from zh_langchain import construct_vector_store
|
from zh_langchain import construct_vector_store
|
||||||
self.qa_handle, self.kai_path = construct_vector_store(
|
self.qa_handle, self.kai_path = construct_vector_store(
|
||||||
vs_id=self.current_id,
|
vs_id=self.current_id,
|
||||||
files=file_manifest,
|
files=file_manifest,
|
||||||
sentence_size=100,
|
sentence_size=100,
|
||||||
history=[],
|
history=[],
|
||||||
one_conent="",
|
one_conent="",
|
||||||
one_content_segmentation="",
|
one_content_segmentation="",
|
||||||
text2vec=self.get_chinese_text2vec(),
|
text2vec = self.get_chinese_text2vec(),
|
||||||
)
|
)
|
||||||
self.threadLock.release()
|
self.threadLock.release()
|
||||||
|
|
||||||
def get_current_archive_id(self):
|
def get_current_archive_id(self):
|
||||||
return self.current_id
|
return self.current_id
|
||||||
|
|
||||||
def get_loaded_file(self):
|
def get_loaded_file(self):
|
||||||
return self.qa_handle.get_loaded_file()
|
return self.qa_handle.get_loaded_file()
|
||||||
|
|
||||||
@ -691,31 +670,30 @@ class knowledge_archive_interface():
|
|||||||
if not self.current_id == id:
|
if not self.current_id == id:
|
||||||
self.current_id = id
|
self.current_id = id
|
||||||
from zh_langchain import construct_vector_store
|
from zh_langchain import construct_vector_store
|
||||||
self.qa_handle, self.kai_path = construct_vector_store(
|
self.qa_handle, self.kai_path = construct_vector_store(
|
||||||
vs_id=self.current_id,
|
vs_id=self.current_id,
|
||||||
files=[],
|
files=[],
|
||||||
sentence_size=100,
|
sentence_size=100,
|
||||||
history=[],
|
history=[],
|
||||||
one_conent="",
|
one_conent="",
|
||||||
one_content_segmentation="",
|
one_content_segmentation="",
|
||||||
text2vec=self.get_chinese_text2vec(),
|
text2vec = self.get_chinese_text2vec(),
|
||||||
)
|
)
|
||||||
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
VECTOR_SEARCH_SCORE_THRESHOLD = 0
|
||||||
VECTOR_SEARCH_TOP_K = 4
|
VECTOR_SEARCH_TOP_K = 4
|
||||||
CHUNK_SIZE = 512
|
CHUNK_SIZE = 512
|
||||||
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
|
resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
|
||||||
query=txt,
|
query = txt,
|
||||||
vs_path=self.kai_path,
|
vs_path = self.kai_path,
|
||||||
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
|
||||||
vector_search_top_k=VECTOR_SEARCH_TOP_K,
|
vector_search_top_k=VECTOR_SEARCH_TOP_K,
|
||||||
chunk_conent=True,
|
chunk_conent=True,
|
||||||
chunk_size=CHUNK_SIZE,
|
chunk_size=CHUNK_SIZE,
|
||||||
text2vec=self.get_chinese_text2vec(),
|
text2vec = self.get_chinese_text2vec(),
|
||||||
)
|
)
|
||||||
self.threadLock.release()
|
self.threadLock.release()
|
||||||
return resp, prompt
|
return resp, prompt
|
||||||
|
|
||||||
|
|
||||||
def try_install_deps(deps):
|
def try_install_deps(deps):
|
||||||
for dep in deps:
|
for dep in deps:
|
||||||
import subprocess, sys
|
import subprocess, sys
|
||||||
|
|||||||
@ -203,7 +203,6 @@ def merge_tex_files_(project_foler, main_file, mode):
|
|||||||
c = fx.read()
|
c = fx.read()
|
||||||
else:
|
else:
|
||||||
# e.g., \input{srcs/07_appendix}
|
# e.g., \input{srcs/07_appendix}
|
||||||
assert os.path.exists(fp+'.tex'), f'即找不到{fp},也找不到{fp}.tex,Tex源文件缺失!'
|
|
||||||
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
|
with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
|
||||||
c = fx.read()
|
c = fx.read()
|
||||||
c = merge_tex_files_(project_foler, c, mode)
|
c = merge_tex_files_(project_foler, c, mode)
|
||||||
|
|||||||
@ -27,10 +27,8 @@ def gen_image(llm_kwargs, prompt, resolution="256x256"):
|
|||||||
}
|
}
|
||||||
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
response = requests.post(url, headers=headers, json=data, proxies=proxies)
|
||||||
print(response.content)
|
print(response.content)
|
||||||
try:
|
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
|
||||||
image_url = json.loads(response.content.decode('utf8'))['data'][0]['url']
|
|
||||||
except:
|
|
||||||
raise RuntimeError(response.content.decode())
|
|
||||||
# 文件保存到本地
|
# 文件保存到本地
|
||||||
r = requests.get(image_url, proxies=proxies)
|
r = requests.get(image_url, proxies=proxies)
|
||||||
file_path = 'gpt_log/image_gen/'
|
file_path = 'gpt_log/image_gen/'
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
from toolbox import CatchException, report_execption, write_results_to_file
|
from toolbox import CatchException, report_execption, write_results_to_file
|
||||||
from toolbox import update_ui, promote_file_to_downloadzone
|
from toolbox import update_ui
|
||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||||
from .crazy_utils import read_and_clean_pdf_text
|
from .crazy_utils import read_and_clean_pdf_text
|
||||||
@ -147,14 +147,23 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
|
|||||||
print('writing html result failed:', trimmed_format_exc())
|
print('writing html result failed:', trimmed_format_exc())
|
||||||
|
|
||||||
# 准备文件的下载
|
# 准备文件的下载
|
||||||
|
import shutil
|
||||||
for pdf_path in generated_conclusion_files:
|
for pdf_path in generated_conclusion_files:
|
||||||
# 重命名文件
|
# 重命名文件
|
||||||
rename_file = f'翻译-{os.path.basename(pdf_path)}'
|
rename_file = f'./gpt_log/翻译-{os.path.basename(pdf_path)}'
|
||||||
promote_file_to_downloadzone(pdf_path, rename_file=rename_file, chatbot=chatbot)
|
if os.path.exists(rename_file):
|
||||||
|
os.remove(rename_file)
|
||||||
|
shutil.copyfile(pdf_path, rename_file)
|
||||||
|
if os.path.exists(pdf_path):
|
||||||
|
os.remove(pdf_path)
|
||||||
for html_path in generated_html_files:
|
for html_path in generated_html_files:
|
||||||
# 重命名文件
|
# 重命名文件
|
||||||
rename_file = f'翻译-{os.path.basename(html_path)}'
|
rename_file = f'./gpt_log/翻译-{os.path.basename(html_path)}'
|
||||||
promote_file_to_downloadzone(html_path, rename_file=rename_file, chatbot=chatbot)
|
if os.path.exists(rename_file):
|
||||||
|
os.remove(rename_file)
|
||||||
|
shutil.copyfile(html_path, rename_file)
|
||||||
|
if os.path.exists(html_path):
|
||||||
|
os.remove(html_path)
|
||||||
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
|
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
|
|||||||
@ -13,11 +13,11 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
|
|||||||
web_port 当前软件运行的端口号
|
web_port 当前软件运行的端口号
|
||||||
"""
|
"""
|
||||||
history = [] # 清空历史,以免输入溢出
|
history = [] # 清空历史,以免输入溢出
|
||||||
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
|
chatbot.append((txt, "正在同时咨询gpt-3.5和gpt-4……"))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||||
|
|
||||||
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
||||||
llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
llm_kwargs['llm_model'] = 'gpt-3.5-turbo&gpt-4' # 支持任意数量的llm接口,用&符号分隔
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||||
inputs=txt, inputs_show_user=txt,
|
inputs=txt, inputs_show_user=txt,
|
||||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||||
|
|||||||
@ -1,78 +1,67 @@
|
|||||||
from toolbox import CatchException, report_execption, write_results_to_file
|
|
||||||
from toolbox import update_ui
|
from toolbox import update_ui
|
||||||
|
from toolbox import CatchException, report_execption, write_results_to_file
|
||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||||
|
|
||||||
fast_debug = False
|
fast_debug = False
|
||||||
|
|
||||||
|
|
||||||
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||||
import time
|
import time, glob, os
|
||||||
import os
|
|
||||||
print('begin analysis on:', file_manifest)
|
print('begin analysis on:', file_manifest)
|
||||||
for index, fp in enumerate(file_manifest):
|
for index, fp in enumerate(file_manifest):
|
||||||
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
|
||||||
file_content = f.read()
|
file_content = f.read()
|
||||||
|
|
||||||
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index == 0 else ""
|
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else ""
|
||||||
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
|
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
|
||||||
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
|
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
|
||||||
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
if not fast_debug:
|
if not fast_debug:
|
||||||
msg = '正常'
|
msg = '正常'
|
||||||
# ** gpt request **
|
# ** gpt request **
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs,
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时
|
||||||
chatbot, history=[],
|
|
||||||
sys_prompt=system_prompt) # 带超时倒计时
|
|
||||||
|
|
||||||
chatbot[-1] = (i_say_show_user, gpt_say)
|
chatbot[-1] = (i_say_show_user, gpt_say)
|
||||||
history.append(i_say_show_user);
|
history.append(i_say_show_user); history.append(gpt_say)
|
||||||
history.append(gpt_say)
|
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
|
||||||
if not fast_debug: time.sleep(2)
|
if not fast_debug: time.sleep(2)
|
||||||
|
|
||||||
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
|
all_file = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(file_manifest)])
|
||||||
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
|
i_say = f'根据以上你自己的分析,对全文进行概括,用学术性语言写一段中文摘要,然后再写一段英文摘要(包括{all_file})。'
|
||||||
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
chatbot.append((i_say, "[Local Message] waiting gpt response."))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
|
|
||||||
if not fast_debug:
|
if not fast_debug:
|
||||||
msg = '正常'
|
msg = '正常'
|
||||||
# ** gpt request **
|
# ** gpt request **
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot,
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时
|
||||||
history=history,
|
|
||||||
sys_prompt=system_prompt) # 带超时倒计时
|
|
||||||
|
|
||||||
chatbot[-1] = (i_say, gpt_say)
|
chatbot[-1] = (i_say, gpt_say)
|
||||||
history.append(i_say)
|
history.append(i_say); history.append(gpt_say)
|
||||||
history.append(gpt_say)
|
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
|
||||||
res = write_results_to_file(history)
|
res = write_results_to_file(history)
|
||||||
chatbot.append(("完成了吗?", res))
|
chatbot.append(("完成了吗?", res))
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@CatchException
|
@CatchException
|
||||||
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, system_prompt, web_port, history=None):
|
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
# history = [] # 清空历史,以免输入溢出
|
history = [] # 清空历史,以免输入溢出
|
||||||
if history is None:
|
import glob, os
|
||||||
history = [] # 清空历史,以免输入溢出
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
if os.path.exists(txt):
|
if os.path.exists(txt):
|
||||||
project_folder = txt
|
project_folder = txt
|
||||||
else:
|
else:
|
||||||
if txt == "":
|
if txt == "": txt = '空空如也的输入栏'
|
||||||
txt = '空空如也的输入栏'
|
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||||
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
||||||
return
|
return
|
||||||
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
|
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
|
||||||
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
|
||||||
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
|
||||||
if len(file_manifest) == 0:
|
if len(file_manifest) == 0:
|
||||||
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
|
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}")
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||||
return
|
return
|
||||||
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||||
|
|||||||
@ -104,7 +104,7 @@ def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|||||||
meta_paper_info_list = meta_paper_info_list[batchsize:]
|
meta_paper_info_list = meta_paper_info_list[batchsize:]
|
||||||
|
|
||||||
chatbot.append(["状态?",
|
chatbot.append(["状态?",
|
||||||
"已经全部完成,您可以试试让AI写一个Related Works,例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
|
"已经全部完成,您可以试试让AI写一个Related Works,例如您可以继续输入Write an academic \"Related Works\" section about \"你搜索的研究领域\" for me."])
|
||||||
msg = '正常'
|
msg = '正常'
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||||
res = write_results_to_file(history)
|
res = write_results_to_file(history)
|
||||||
|
|||||||
@ -1,28 +0,0 @@
|
|||||||
# encoding: utf-8
|
|
||||||
# @Time : 2023/4/19
|
|
||||||
# @Author : Spike
|
|
||||||
# @Descr :
|
|
||||||
from toolbox import update_ui
|
|
||||||
from toolbox import CatchException, report_execption, write_results_to_file
|
|
||||||
from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
|
||||||
|
|
||||||
|
|
||||||
@CatchException
|
|
||||||
def 猜你想问(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
|
||||||
if txt:
|
|
||||||
show_say = txt
|
|
||||||
prompt = txt+'\n回答完问题后,再列出用户可能提出的三个问题。'
|
|
||||||
else:
|
|
||||||
prompt = history[-1]+"\n分析上述回答,再列出用户可能提出的三个问题。"
|
|
||||||
show_say = '分析上述回答,再列出用户可能提出的三个问题。'
|
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
|
||||||
inputs=prompt,
|
|
||||||
inputs_show_user=show_say,
|
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
chatbot=chatbot,
|
|
||||||
history=history,
|
|
||||||
sys_prompt=system_prompt
|
|
||||||
)
|
|
||||||
chatbot[-1] = (show_say, gpt_say)
|
|
||||||
history.extend([show_say, gpt_say])
|
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
|
||||||
Binary file not shown.
@ -28,7 +28,6 @@ proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
|
|||||||
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
|
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
|
||||||
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
|
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
|
||||||
|
|
||||||
|
|
||||||
def get_full_error(chunk, stream_response):
|
def get_full_error(chunk, stream_response):
|
||||||
"""
|
"""
|
||||||
获取完整的从Openai返回的报错
|
获取完整的从Openai返回的报错
|
||||||
@ -41,9 +40,7 @@ def get_full_error(chunk, stream_response):
|
|||||||
return chunk
|
return chunk
|
||||||
|
|
||||||
|
|
||||||
def predict_no_ui_long_connection(
|
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
|
||||||
inputs, llm_kwargs, history=None, sys_prompt="", observe_window=None, console_slience=False
|
|
||||||
):
|
|
||||||
"""
|
"""
|
||||||
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
发送至chatGPT,等待回复,一次性完成,不显示中间过程。但内部用stream的方法避免中途网线被掐。
|
||||||
inputs:
|
inputs:
|
||||||
@ -57,59 +54,45 @@ def predict_no_ui_long_connection(
|
|||||||
observe_window = None:
|
observe_window = None:
|
||||||
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
用于负责跨越线程传递已经输出的部分,大部分时候仅仅为了fancy的视觉效果,留空即可。observe_window[0]:观测窗。observe_window[1]:看门狗
|
||||||
"""
|
"""
|
||||||
if history is None:
|
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
|
||||||
history = []
|
|
||||||
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
|
|
||||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
|
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
|
||||||
retry = 0
|
retry = 0
|
||||||
from bridge_all import model_info
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# make a POST request to the API endpoint, stream=False
|
# make a POST request to the API endpoint, stream=False
|
||||||
|
from .bridge_all import model_info
|
||||||
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
||||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||||
json=payload, stream=True, timeout=TIMEOUT_SECONDS)
|
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break
|
||||||
stream_response = response.iter_lines()
|
except requests.exceptions.ReadTimeout as e:
|
||||||
break
|
|
||||||
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
|
|
||||||
retry += 1
|
retry += 1
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
if retry > MAX_RETRY:
|
if retry > MAX_RETRY: raise TimeoutError
|
||||||
raise TimeoutError
|
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
||||||
if MAX_RETRY != 0:
|
|
||||||
print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
|
|
||||||
except Exception as e:
|
|
||||||
print(f"出现异常:{e}")
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
stream_response = response.iter_lines()
|
||||||
result = ''
|
result = ''
|
||||||
while True:
|
while True:
|
||||||
try:
|
try: chunk = next(stream_response).decode()
|
||||||
chunk = next(stream_response).decode()
|
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
break
|
break
|
||||||
# except requests.exceptions.ConnectionError:
|
except requests.exceptions.ConnectionError:
|
||||||
# chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
|
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
|
||||||
if len(chunk) == 0:
|
if len(chunk)==0: continue
|
||||||
continue
|
|
||||||
if not chunk.startswith('data:'):
|
if not chunk.startswith('data:'):
|
||||||
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
|
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
|
||||||
if "reduce the length" in error_msg:
|
if "reduce the length" in error_msg:
|
||||||
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
|
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
|
||||||
else:
|
else:
|
||||||
raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
|
raise RuntimeError("OpenAI拒绝了请求:" + error_msg)
|
||||||
if 'data: [DONE]' in chunk:
|
if ('data: [DONE]' in chunk): break # api2d 正常完成
|
||||||
break # api2d 正常完成
|
|
||||||
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
|
json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
|
||||||
delta = json_data["delta"]
|
delta = json_data["delta"]
|
||||||
if len(delta) == 0:
|
if len(delta) == 0: break
|
||||||
break
|
if "role" in delta: continue
|
||||||
if "role" in delta:
|
|
||||||
continue
|
|
||||||
if "content" in delta:
|
if "content" in delta:
|
||||||
result += delta["content"]
|
result += delta["content"]
|
||||||
if not console_slience:
|
if not console_slience: print(delta["content"], end='')
|
||||||
print(delta["content"], end='')
|
|
||||||
if observe_window is not None:
|
if observe_window is not None:
|
||||||
# 观测窗,把已经获取的数据显示出去
|
# 观测窗,把已经获取的数据显示出去
|
||||||
if len(observe_window) >= 1: observe_window[0] += delta["content"]
|
if len(observe_window) >= 1: observe_window[0] += delta["content"]
|
||||||
@ -117,8 +100,7 @@ def predict_no_ui_long_connection(
|
|||||||
if len(observe_window) >= 2:
|
if len(observe_window) >= 2:
|
||||||
if (time.time()-observe_window[1]) > watch_dog_patience:
|
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||||
raise RuntimeError("用户取消了程序。")
|
raise RuntimeError("用户取消了程序。")
|
||||||
else:
|
else: raise RuntimeError("意外Json结构:"+delta)
|
||||||
raise RuntimeError("意外Json结构:"+delta)
|
|
||||||
if json_data['finish_reason'] == 'length':
|
if json_data['finish_reason'] == 'length':
|
||||||
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
raise ConnectionAbortedError("正常结束,但显示Token不足,导致输出不完整,请削减单次输入的文本量。")
|
||||||
return result
|
return result
|
||||||
@ -246,7 +228,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
||||||
"""
|
"""
|
||||||
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
|
整合所有信息,选择LLM模型,生成http请求,为发送请求做准备
|
||||||
@ -266,19 +247,23 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
|||||||
messages = [{"role": "system", "content": system_prompt}]
|
messages = [{"role": "system", "content": system_prompt}]
|
||||||
if conversation_cnt:
|
if conversation_cnt:
|
||||||
for index in range(0, 2*conversation_cnt, 2):
|
for index in range(0, 2*conversation_cnt, 2):
|
||||||
what_i_have_asked = {"role": "user", "content": history[index]}
|
what_i_have_asked = {}
|
||||||
what_gpt_answer = {"role": "assistant", "content": history[index + 1]}
|
what_i_have_asked["role"] = "user"
|
||||||
|
what_i_have_asked["content"] = history[index]
|
||||||
|
what_gpt_answer = {}
|
||||||
|
what_gpt_answer["role"] = "assistant"
|
||||||
|
what_gpt_answer["content"] = history[index+1]
|
||||||
if what_i_have_asked["content"] != "":
|
if what_i_have_asked["content"] != "":
|
||||||
if what_gpt_answer["content"] == "":
|
if what_gpt_answer["content"] == "": continue
|
||||||
continue
|
if what_gpt_answer["content"] == timeout_bot_msg: continue
|
||||||
if what_gpt_answer["content"] == timeout_bot_msg:
|
|
||||||
continue
|
|
||||||
messages.append(what_i_have_asked)
|
messages.append(what_i_have_asked)
|
||||||
messages.append(what_gpt_answer)
|
messages.append(what_gpt_answer)
|
||||||
else:
|
else:
|
||||||
messages[-1]['content'] = what_gpt_answer['content']
|
messages[-1]['content'] = what_gpt_answer['content']
|
||||||
|
|
||||||
what_i_ask_now = {"role": "user", "content": inputs}
|
what_i_ask_now = {}
|
||||||
|
what_i_ask_now["role"] = "user"
|
||||||
|
what_i_ask_now["content"] = inputs
|
||||||
messages.append(what_i_ask_now)
|
messages.append(what_i_ask_now)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
@ -293,8 +278,8 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
|||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
|
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
|
||||||
except Exception as e:
|
except:
|
||||||
print(f'输入中可能存在乱码。抛出异常: {e}')
|
print('输入中可能存在乱码。')
|
||||||
return headers, payload
|
return headers,payload
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,3 @@
|
|||||||
gradio>=3.33.1
|
|
||||||
tiktoken>=0.3.3
|
tiktoken>=0.3.3
|
||||||
requests[socks]
|
requests[socks]
|
||||||
transformers
|
transformers
|
||||||
@ -16,5 +15,3 @@ openai
|
|||||||
numpy
|
numpy
|
||||||
arxiv
|
arxiv
|
||||||
rich
|
rich
|
||||||
langchain
|
|
||||||
zh_langchain
|
|
||||||
39
toolbox.py
39
toolbox.py
@ -21,7 +21,6 @@ pj = os.path.join
|
|||||||
========================================================================
|
========================================================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class ChatBotWithCookies(list):
|
class ChatBotWithCookies(list):
|
||||||
def __init__(self, cookie):
|
def __init__(self, cookie):
|
||||||
self._cookies = cookie
|
self._cookies = cookie
|
||||||
@ -72,13 +71,11 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面
|
|||||||
assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时,可用clear将其清空,然后用for+append循环重新赋值。"
|
assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时,可用clear将其清空,然后用for+append循环重新赋值。"
|
||||||
yield chatbot.get_cookies(), chatbot, history, msg
|
yield chatbot.get_cookies(), chatbot, history, msg
|
||||||
|
|
||||||
|
|
||||||
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
|
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
|
||||||
"""
|
"""
|
||||||
刷新用户界面
|
刷新用户界面
|
||||||
"""
|
"""
|
||||||
if len(chatbot) == 0:
|
if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg])
|
||||||
chatbot.append(["update_ui_last_msg", lastmsg])
|
|
||||||
chatbot[-1] = list(chatbot[-1])
|
chatbot[-1] = list(chatbot[-1])
|
||||||
chatbot[-1][-1] = lastmsg
|
chatbot[-1][-1] = lastmsg
|
||||||
yield from update_ui(chatbot=chatbot, history=history)
|
yield from update_ui(chatbot=chatbot, history=history)
|
||||||
@ -86,25 +83,24 @@ def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
|
|||||||
|
|
||||||
|
|
||||||
def trimmed_format_exc():
|
def trimmed_format_exc():
|
||||||
import os
|
import os, traceback
|
||||||
import traceback
|
str = traceback.format_exc()
|
||||||
_str = traceback.format_exc()
|
|
||||||
current_path = os.getcwd()
|
current_path = os.getcwd()
|
||||||
replace_path = "."
|
replace_path = "."
|
||||||
return _str.replace(current_path, replace_path)
|
return str.replace(current_path, replace_path)
|
||||||
|
|
||||||
|
|
||||||
def CatchException(f):
|
def CatchException(f):
|
||||||
"""
|
"""
|
||||||
装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。
|
装饰器函数,捕捉函数f中的异常并封装到一个生成器中返回,并显示到聊天当中。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@wraps(f)
|
@wraps(f)
|
||||||
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
|
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
|
||||||
try:
|
try:
|
||||||
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
|
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
from check_proxy import check_proxy
|
from check_proxy import check_proxy
|
||||||
# from toolbox import get_conf # 不需要导入本文件内容
|
from toolbox import get_conf
|
||||||
proxies, = get_conf('proxies')
|
proxies, = get_conf('proxies')
|
||||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||||
if len(chatbot) == 0:
|
if len(chatbot) == 0:
|
||||||
@ -112,7 +108,7 @@ def CatchException(f):
|
|||||||
chatbot.append(["插件调度异常", "异常原因"])
|
chatbot.append(["插件调度异常", "异常原因"])
|
||||||
chatbot[-1] = (chatbot[-1][0],
|
chatbot[-1] = (chatbot[-1][0],
|
||||||
f"[Local Message] 实验性函数调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
|
f"[Local Message] 实验性函数调用出错: \n\n{tb_str} \n\n当前代理可用性: \n\n{check_proxy(proxies)}")
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=f'异常 {e}') # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg=f'异常 {e}') # 刷新界面
|
||||||
return decorated
|
return decorated
|
||||||
|
|
||||||
|
|
||||||
@ -152,7 +148,6 @@ def HotReload(f):
|
|||||||
========================================================================
|
========================================================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def get_reduce_token_percent(text):
|
def get_reduce_token_percent(text):
|
||||||
"""
|
"""
|
||||||
* 此函数未来将被弃用
|
* 此函数未来将被弃用
|
||||||
@ -212,6 +207,8 @@ def regular_txt_to_markdown(text):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def report_execption(chatbot, history, a, b):
|
def report_execption(chatbot, history, a, b):
|
||||||
"""
|
"""
|
||||||
向chatbot中添加错误信息
|
向chatbot中添加错误信息
|
||||||
@ -241,7 +238,6 @@ def text_divide_paragraph(text):
|
|||||||
text = "</br>".join(lines)
|
text = "</br>".join(lines)
|
||||||
return pre + text + suf
|
return pre + text + suf
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
|
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
|
||||||
def markdown_convertion(txt):
|
def markdown_convertion(txt):
|
||||||
"""
|
"""
|
||||||
@ -444,7 +440,6 @@ def find_recent_files(directory):
|
|||||||
|
|
||||||
return recent_files
|
return recent_files
|
||||||
|
|
||||||
|
|
||||||
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
|
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
|
||||||
# 将文件复制一份到下载区
|
# 将文件复制一份到下载区
|
||||||
import shutil
|
import shutil
|
||||||
@ -457,7 +452,6 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
|
|||||||
else: current = []
|
else: current = []
|
||||||
chatbot._cookies.update({'file_to_promote': [new_path] + current})
|
chatbot._cookies.update({'file_to_promote': [new_path] + current})
|
||||||
|
|
||||||
|
|
||||||
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
|
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
|
||||||
"""
|
"""
|
||||||
当文件被上传时的回调函数
|
当文件被上传时的回调函数
|
||||||
@ -511,20 +505,17 @@ def on_report_generated(cookies, files, chatbot):
|
|||||||
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
|
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
|
||||||
return cookies, report_files, chatbot
|
return cookies, report_files, chatbot
|
||||||
|
|
||||||
|
|
||||||
def is_openai_api_key(key):
|
def is_openai_api_key(key):
|
||||||
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
|
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
|
||||||
API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
|
API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
|
||||||
return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE)
|
return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE)
|
||||||
|
|
||||||
|
|
||||||
def is_api2d_key(key):
|
def is_api2d_key(key):
|
||||||
if key.startswith('fk') and len(key) == 41:
|
if key.startswith('fk') and len(key) == 41:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def is_any_api_key(key):
|
def is_any_api_key(key):
|
||||||
if ',' in key:
|
if ',' in key:
|
||||||
keys = key.split(',')
|
keys = key.split(',')
|
||||||
@ -534,7 +525,6 @@ def is_any_api_key(key):
|
|||||||
else:
|
else:
|
||||||
return is_openai_api_key(key) or is_api2d_key(key)
|
return is_openai_api_key(key) or is_api2d_key(key)
|
||||||
|
|
||||||
|
|
||||||
def what_keys(keys):
|
def what_keys(keys):
|
||||||
avail_key_list = {'OpenAI Key':0, "API2D Key":0}
|
avail_key_list = {'OpenAI Key':0, "API2D Key":0}
|
||||||
key_list = keys.split(',')
|
key_list = keys.split(',')
|
||||||
@ -549,7 +539,6 @@ def what_keys(keys):
|
|||||||
|
|
||||||
return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']} 个,API2D Key {avail_key_list['API2D Key']} 个"
|
return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']} 个,API2D Key {avail_key_list['API2D Key']} 个"
|
||||||
|
|
||||||
|
|
||||||
def select_api_key(keys, llm_model):
|
def select_api_key(keys, llm_model):
|
||||||
import random
|
import random
|
||||||
avail_key_list = []
|
avail_key_list = []
|
||||||
@ -569,7 +558,6 @@ def select_api_key(keys, llm_model):
|
|||||||
api_key = random.choice(avail_key_list) # 随机负载均衡
|
api_key = random.choice(avail_key_list) # 随机负载均衡
|
||||||
return api_key
|
return api_key
|
||||||
|
|
||||||
|
|
||||||
def read_env_variable(arg, default_value):
|
def read_env_variable(arg, default_value):
|
||||||
"""
|
"""
|
||||||
环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG`
|
环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG`
|
||||||
@ -624,7 +612,6 @@ def read_env_variable(arg, default_value):
|
|||||||
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
|
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=128)
|
@lru_cache(maxsize=128)
|
||||||
def read_single_conf_with_lru_cache(arg):
|
def read_single_conf_with_lru_cache(arg):
|
||||||
from colorful import print亮红, print亮绿, print亮蓝
|
from colorful import print亮红, print亮绿, print亮蓝
|
||||||
@ -689,7 +676,6 @@ class DummyWith():
|
|||||||
def __exit__(self, exc_type, exc_value, traceback):
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def run_gradio_in_subpath(demo, auth, port, custom_path):
|
def run_gradio_in_subpath(demo, auth, port, custom_path):
|
||||||
"""
|
"""
|
||||||
把gradio的运行地址更改到指定的二次路径上
|
把gradio的运行地址更改到指定的二次路径上
|
||||||
@ -784,7 +770,6 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
|
|||||||
========================================================================
|
========================================================================
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def zip_folder(source_folder, dest_folder, zip_name):
|
def zip_folder(source_folder, dest_folder, zip_name):
|
||||||
import zipfile
|
import zipfile
|
||||||
import os
|
import os
|
||||||
@ -816,7 +801,6 @@ def zip_folder(source_folder, dest_folder, zip_name):
|
|||||||
|
|
||||||
print(f"Zip file created at {zip_file}")
|
print(f"Zip file created at {zip_file}")
|
||||||
|
|
||||||
|
|
||||||
def zip_result(folder):
|
def zip_result(folder):
|
||||||
import time
|
import time
|
||||||
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
||||||
@ -827,7 +811,6 @@ def gen_time_str():
|
|||||||
import time
|
import time
|
||||||
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
||||||
|
|
||||||
|
|
||||||
class ProxyNetworkActivate():
|
class ProxyNetworkActivate():
|
||||||
"""
|
"""
|
||||||
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
|
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
|
||||||
@ -847,18 +830,16 @@ class ProxyNetworkActivate():
|
|||||||
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
|
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def objdump(obj, file='objdump.tmp'):
|
def objdump(obj, file='objdump.tmp'):
|
||||||
import pickle
|
import pickle
|
||||||
with open(file, 'wb+') as f:
|
with open(file, 'wb+') as f:
|
||||||
pickle.dump(obj, f)
|
pickle.dump(obj, f)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def objload(file='objdump.tmp'):
|
def objload(file='objdump.tmp'):
|
||||||
import pickle, os
|
import pickle, os
|
||||||
if not os.path.exists(file):
|
if not os.path.exists(file):
|
||||||
return
|
return
|
||||||
with open(file, 'rb') as f:
|
with open(file, 'rb') as f:
|
||||||
return pickle.load(f)
|
return pickle.load(f)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user