Compare commits
166 Commits
huggingfac
...
version3.4
| Author | SHA1 | Date | |
|---|---|---|---|
| 49253c4dc6 | |||
| 1a00093015 | |||
| 64f76e7401 | |||
| eb4c07997e | |||
| d684b4cdb3 | |||
| 601a95c948 | |||
| e18bef2e9c | |||
| f654c1af31 | |||
| e90048a671 | |||
| ea624b1510 | |||
| 057e3dda3c | |||
| 4290821a50 | |||
| 280e14d7b7 | |||
| 9f0cf9fb2b | |||
| b8560b7510 | |||
| d841d13b04 | |||
| efda9e5193 | |||
| 33d2e75aac | |||
| 74941170aa | |||
| cd38949903 | |||
| d87f1eb171 | |||
| cd1e4e1ba7 | |||
| cf5f348d70 | |||
| 0ee25f475e | |||
| 1fede6df7f | |||
| 22a65cd163 | |||
| 538b041ea3 | |||
| d7b056576d | |||
| cb0bb6ab4a | |||
| bf955aaf12 | |||
| 61eb0da861 | |||
| 5da633d94d | |||
| f3e4e26e2f | |||
| af7734dd35 | |||
| d5bab093f9 | |||
| f94b167dc2 | |||
| 951d5ec758 | |||
| 016d8ee156 | |||
| dca9ec4bae | |||
| a06e43c96b | |||
| 29c6bfb6cb | |||
| 8d7ee975a0 | |||
| 4bafbb3562 | |||
| 7fdf0a8e51 | |||
| 2bb13b4677 | |||
| 9a5a509dd9 | |||
| cbcb98ef6a | |||
| bb864c6313 | |||
| 6d849eeb12 | |||
| ef752838b0 | |||
| 73d4a1ff4b | |||
| 8c62f21aa6 | |||
| c40ebfc21f | |||
| c365ea9f57 | |||
| 12d66777cc | |||
| 9ac3d0d65d | |||
| 9fd212652e | |||
| 790a1cf12a | |||
| 3ecf2977a8 | |||
| aeddf6b461 | |||
| ce0d8b9dab | |||
| 3c00e7a143 | |||
| ef1bfdd60f | |||
| e48d92e82e | |||
| 110510997f | |||
| b52695845e | |||
| f30c9c6d3b | |||
| ff5403eac6 | |||
| f9226d92be | |||
| a0ea5d0e9e | |||
| ce6f11d200 | |||
| 10b3001dba | |||
| e2de1d76ea | |||
| 77cc141a82 | |||
| 526b4d8ecd | |||
| 149db621ec | |||
| 2e1bb7311c | |||
| dae65fd2c2 | |||
| 9aafb2ee47 | |||
| 6bc91bd02e | |||
| 8ef7344101 | |||
| 40da1b0afe | |||
| c65def90f3 | |||
| ddeaf76422 | |||
| f23b66dec2 | |||
| a26b294817 | |||
| 66018840da | |||
| cea2144f34 | |||
| 7f5be93c1d | |||
| 85b838b302 | |||
| 27f97ba92a | |||
| 14269eba98 | |||
| d5c9bc9f0a | |||
| b0fed3edfc | |||
| 7296d054a2 | |||
| d57c7d352d | |||
| 3fd2927ea3 | |||
| b745074160 | |||
| 70ee810133 | |||
| 68fea9e79b | |||
| f82bf91aa8 | |||
| dde9edcc0c | |||
| 66c78e459e | |||
| de54102303 | |||
| 7c7d2d8a84 | |||
| 834f989ed4 | |||
| b658ee6e04 | |||
| 1a60280ea0 | |||
| 991cb7d272 | |||
| 463991cfb2 | |||
| 06f10b5fdc | |||
| d275d012c6 | |||
| c5d1ea3e21 | |||
| 0022b92404 | |||
| ef61221241 | |||
| 5a1831db98 | |||
| a643f8b0db | |||
| 601712fd0a | |||
| e769f831c7 | |||
| dcd952671f | |||
| 06564df038 | |||
| 2f037f30d5 | |||
| efedab186d | |||
| f49cae5116 | |||
| 2b620ccf2e | |||
| a1b7a4da56 | |||
| 61b0e49fed | |||
| f60dc371db | |||
| 0a3433b8ac | |||
| 31bce54abb | |||
| 5db1530717 | |||
| c32929fd11 | |||
| 3e4c2b056c | |||
| e79e9d7d23 | |||
| d175b93072 | |||
| ed254687d2 | |||
| c0392f7074 | |||
| f437712af7 | |||
| 6d1ea643e9 | |||
| 9e84cfcd46 | |||
| 897695d29f | |||
| 1dcc2873d2 | |||
| 42cf738a31 | |||
| e4646789af | |||
| e6c3aabd45 | |||
| 6789d1fab4 | |||
| 7a733f00a2 | |||
| dd55888f0e | |||
| 0327df22eb | |||
| e544f5e9d0 | |||
| 0fad4f44a4 | |||
| 1240dd6f26 | |||
| d6be947177 | |||
| 3cfbdce9f2 | |||
| 1ee471ff57 | |||
| 25ccecf8e3 | |||
| 9e991bfa3e | |||
| 221efd0193 | |||
| 976b9bf65f | |||
| ae5783e383 | |||
| 30224af042 | |||
| 8ff7c15cd8 | |||
| f3205994ea | |||
| ec8cc48a4d | |||
| 5d75c578b9 | |||
| cd411c2eea |
12
README.md
12
README.md
@ -1,15 +1,3 @@
|
|||||||
---
|
|
||||||
title: ChatImprovement
|
|
||||||
emoji: 😻
|
|
||||||
colorFrom: blue
|
|
||||||
colorTo: blue
|
|
||||||
sdk: gradio
|
|
||||||
sdk_version: 3.32.0
|
|
||||||
app_file: app.py
|
|
||||||
pinned: false
|
|
||||||
---
|
|
||||||
|
|
||||||
# ChatGPT 学术优化
|
|
||||||
> **Note**
|
> **Note**
|
||||||
>
|
>
|
||||||
> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**:
|
> 2023.5.27 对Gradio依赖进行了调整,Fork并解决了官方Gradio的若干Bugs。请及时**更新代码**并重新更新pip依赖。安装依赖时,请严格选择`requirements.txt`中**指定的版本**:
|
||||||
|
|||||||
@ -45,9 +45,10 @@ WEB_PORT = -1
|
|||||||
# 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
|
# 如果OpenAI不响应(网络卡顿、代理失败、KEY失效),重试的次数限制
|
||||||
MAX_RETRY = 2
|
MAX_RETRY = 2
|
||||||
|
|
||||||
# OpenAI模型选择是(gpt4现在只对申请成功的人开放)
|
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
|
||||||
LLM_MODEL = "gpt-3.5-turbo" # 可选 "chatglm"
|
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
|
||||||
AVAIL_LLM_MODELS = ["newbing-free", "gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "api2d-gpt-3.5-turbo"]
|
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
|
||||||
|
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
|
||||||
|
|
||||||
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
|
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
|
||||||
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
|
||||||
|
|||||||
@ -226,20 +226,12 @@ def get_crazy_functions():
|
|||||||
try:
|
try:
|
||||||
from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
from crazy_functions.联网的ChatGPT import 连接网络回答问题
|
||||||
function_plugins.update({
|
function_plugins.update({
|
||||||
"连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
|
"连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": {
|
||||||
"Color": "stop",
|
"Color": "stop",
|
||||||
"AsButton": False, # 加入下拉菜单中
|
"AsButton": False, # 加入下拉菜单中
|
||||||
"Function": HotReload(连接网络回答问题)
|
"Function": HotReload(连接网络回答问题)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
|
|
||||||
function_plugins.update({
|
|
||||||
"连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
|
|
||||||
"Color": "stop",
|
|
||||||
"AsButton": False, # 加入下拉菜单中
|
|
||||||
"Function": HotReload(连接bing搜索回答问题)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
except:
|
except:
|
||||||
print('Load function plugin failed')
|
print('Load function plugin failed')
|
||||||
|
|
||||||
|
|||||||
@ -27,24 +27,6 @@ def set_forbidden_text(text, mask, pattern, flags=0):
|
|||||||
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
mask[res.span()[0]:res.span()[1]] = PRESERVE
|
||||||
return text, mask
|
return text, mask
|
||||||
|
|
||||||
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
|
|
||||||
"""
|
|
||||||
Move area out of preserve area (make text editable for GPT)
|
|
||||||
count the number of the braces so as to catch compelete text area.
|
|
||||||
e.g.
|
|
||||||
\begin{abstract} blablablablablabla. \end{abstract}
|
|
||||||
"""
|
|
||||||
if isinstance(pattern, list): pattern = '|'.join(pattern)
|
|
||||||
pattern_compile = re.compile(pattern, flags)
|
|
||||||
for res in pattern_compile.finditer(text):
|
|
||||||
if not forbid_wrapper:
|
|
||||||
mask[res.span()[0]:res.span()[1]] = TRANSFORM
|
|
||||||
else:
|
|
||||||
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
|
|
||||||
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
|
|
||||||
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
|
|
||||||
return text, mask
|
|
||||||
|
|
||||||
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
|
||||||
"""
|
"""
|
||||||
Add a preserve text area in this paper (text become untouchable for GPT).
|
Add a preserve text area in this paper (text become untouchable for GPT).
|
||||||
@ -344,7 +326,6 @@ def split_subprocess(txt, project_folder, return_dict, opts):
|
|||||||
# reverse 操作必须放在最后
|
# reverse 操作必须放在最后
|
||||||
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
||||||
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
|
||||||
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
|
|
||||||
root = convert_to_linklist(text, mask)
|
root = convert_to_linklist(text, mask)
|
||||||
|
|
||||||
# 修复括号
|
# 修复括号
|
||||||
@ -691,9 +672,10 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
|
|||||||
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
|
||||||
return False, -1, [-1]
|
return False, -1, [-1]
|
||||||
|
|
||||||
def compile_latex_with_timeout(command, cwd, timeout=60):
|
|
||||||
|
def compile_latex_with_timeout(command, timeout=60):
|
||||||
import subprocess
|
import subprocess
|
||||||
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
|
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
try:
|
try:
|
||||||
stdout, stderr = process.communicate(timeout=timeout)
|
stdout, stderr = process.communicate(timeout=timeout)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
@ -717,24 +699,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||||||
|
|
||||||
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
|
||||||
|
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
|
||||||
|
|
||||||
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
|
||||||
# 只有第二步成功,才能继续下面的步骤
|
# 只有第二步成功,才能继续下面的步骤
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
|
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
|
||||||
ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
|
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir)
|
||||||
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
|
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
|
||||||
ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
|
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir)
|
||||||
|
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
|
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
|
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
|
||||||
|
|
||||||
if mode!='translate_zh':
|
if mode!='translate_zh':
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
@ -742,11 +724,13 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||||||
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
|
||||||
|
|
||||||
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
|
||||||
ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
|
os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
|
||||||
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
|
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
|
||||||
|
|
||||||
|
# <--------------------->
|
||||||
|
os.chdir(current_dir)
|
||||||
|
|
||||||
# <---------- 检查结果 ----------->
|
# <---------- 检查结果 ----------->
|
||||||
results_ = ""
|
results_ = ""
|
||||||
@ -782,6 +766,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
|
|||||||
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
|
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
|
||||||
if not can_retry: break
|
if not can_retry: break
|
||||||
|
|
||||||
|
os.chdir(current_dir)
|
||||||
return False # 失败啦
|
return False # 失败啦
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,102 +0,0 @@
|
|||||||
from toolbox import CatchException, update_ui
|
|
||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from request_llm.bridge_all import model_info
|
|
||||||
|
|
||||||
|
|
||||||
def bing_search(query, proxies=None):
|
|
||||||
query = query
|
|
||||||
url = f"https://cn.bing.com/search?q={query}"
|
|
||||||
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
|
|
||||||
response = requests.get(url, headers=headers, proxies=proxies)
|
|
||||||
soup = BeautifulSoup(response.content, 'html.parser')
|
|
||||||
results = []
|
|
||||||
for g in soup.find_all('li', class_='b_algo'):
|
|
||||||
anchors = g.find_all('a')
|
|
||||||
if anchors:
|
|
||||||
link = anchors[0]['href']
|
|
||||||
if not link.startswith('http'):
|
|
||||||
continue
|
|
||||||
title = g.find('h2').text
|
|
||||||
item = {'title': title, 'link': link}
|
|
||||||
results.append(item)
|
|
||||||
|
|
||||||
for r in results:
|
|
||||||
print(r['link'])
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
def scrape_text(url, proxies) -> str:
|
|
||||||
"""Scrape text from a webpage
|
|
||||||
|
|
||||||
Args:
|
|
||||||
url (str): The URL to scrape text from
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The scraped text
|
|
||||||
"""
|
|
||||||
headers = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
|
|
||||||
'Content-Type': 'text/plain',
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
|
|
||||||
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
|
|
||||||
except:
|
|
||||||
return "无法连接到该网页"
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
|
||||||
for script in soup(["script", "style"]):
|
|
||||||
script.extract()
|
|
||||||
text = soup.get_text()
|
|
||||||
lines = (line.strip() for line in text.splitlines())
|
|
||||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|
||||||
text = "\n".join(chunk for chunk in chunks if chunk)
|
|
||||||
return text
|
|
||||||
|
|
||||||
@CatchException
|
|
||||||
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
|
||||||
"""
|
|
||||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
|
||||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
|
||||||
plugin_kwargs 插件模型的参数,暂时没有用武之地
|
|
||||||
chatbot 聊天显示框的句柄,用于显示给用户
|
|
||||||
history 聊天历史,前情提要
|
|
||||||
system_prompt 给gpt的静默提醒
|
|
||||||
web_port 当前软件运行的端口号
|
|
||||||
"""
|
|
||||||
history = [] # 清空历史,以免输入溢出
|
|
||||||
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
|
|
||||||
"[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
|
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
|
||||||
|
|
||||||
# ------------- < 第1步:爬取搜索引擎的结果 > -------------
|
|
||||||
from toolbox import get_conf
|
|
||||||
proxies, = get_conf('proxies')
|
|
||||||
urls = bing_search(txt, proxies)
|
|
||||||
history = []
|
|
||||||
|
|
||||||
# ------------- < 第2步:依次访问网页 > -------------
|
|
||||||
max_search_result = 8 # 最多收纳多少个网页的结果
|
|
||||||
for index, url in enumerate(urls[:max_search_result]):
|
|
||||||
res = scrape_text(url['link'], proxies)
|
|
||||||
history.extend([f"第{index}份搜索结果:", res])
|
|
||||||
chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
|
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
|
||||||
|
|
||||||
# ------------- < 第3步:ChatGPT综合 > -------------
|
|
||||||
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
|
|
||||||
i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
|
|
||||||
inputs=i_say,
|
|
||||||
history=history,
|
|
||||||
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
|
|
||||||
)
|
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
|
||||||
inputs=i_say, inputs_show_user=i_say,
|
|
||||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
|
||||||
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
|
|
||||||
)
|
|
||||||
chatbot[-1] = (i_say, gpt_say)
|
|
||||||
history.append(i_say);history.append(gpt_say)
|
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
|
||||||
|
|
||||||
@ -13,11 +13,11 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
|
|||||||
web_port 当前软件运行的端口号
|
web_port 当前软件运行的端口号
|
||||||
"""
|
"""
|
||||||
history = [] # 清空历史,以免输入溢出
|
history = [] # 清空历史,以免输入溢出
|
||||||
chatbot.append((txt, "正在同时咨询gpt-3.5和gpt-4……"))
|
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||||
|
|
||||||
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
||||||
llm_kwargs['llm_model'] = 'gpt-3.5-turbo&gpt-4' # 支持任意数量的llm接口,用&符号分隔
|
llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||||
inputs=txt, inputs_show_user=txt,
|
inputs=txt, inputs_show_user=txt,
|
||||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||||
|
|||||||
@ -104,7 +104,7 @@ def 谷歌检索小助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
|||||||
meta_paper_info_list = meta_paper_info_list[batchsize:]
|
meta_paper_info_list = meta_paper_info_list[batchsize:]
|
||||||
|
|
||||||
chatbot.append(["状态?",
|
chatbot.append(["状态?",
|
||||||
"已经全部完成,您可以试试让AI写一个Related Works,例如您可以继续输入Write an academic \"Related Works\" section about \"你搜索的研究领域\" for me."])
|
"已经全部完成,您可以试试让AI写一个Related Works,例如您可以继续输入Write a \"Related Works\" section about \"你搜索的研究领域\" for me."])
|
||||||
msg = '正常'
|
msg = '正常'
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
|
||||||
res = write_results_to_file(history)
|
res = write_results_to_file(history)
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
from toolbox import CatchException, update_ui
|
from toolbox import CatchException, update_ui
|
||||||
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
|
||||||
import datetime, re
|
import datetime
|
||||||
|
|
||||||
@CatchException
|
@CatchException
|
||||||
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||||
"""
|
"""
|
||||||
@ -19,34 +18,12 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
|||||||
for i in range(5):
|
for i in range(5):
|
||||||
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
|
||||||
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
|
||||||
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式:{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}。'
|
i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
|
||||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||||
inputs=i_say, inputs_show_user=i_say,
|
inputs=i_say, inputs_show_user=i_say,
|
||||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
|
||||||
sys_prompt='输出格式示例:1908年,美国消防救援事业发展的“美国消防协会”成立。关键词:{"KeyWords":["Fire","American"]}。'
|
sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
|
||||||
)
|
)
|
||||||
gpt_say = get_images(gpt_say)
|
|
||||||
chatbot[-1] = (i_say, gpt_say)
|
chatbot[-1] = (i_say, gpt_say)
|
||||||
history.append(i_say);history.append(gpt_say)
|
history.append(i_say);history.append(gpt_say)
|
||||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||||
|
|
||||||
|
|
||||||
def get_images(gpt_say):
|
|
||||||
def get_image_by_keyword(keyword):
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
|
|
||||||
for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
|
|
||||||
if "data-src" in image_element: break
|
|
||||||
return image_element["data-src"]
|
|
||||||
|
|
||||||
for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say):
|
|
||||||
keywords = [n.strip('"') for n in keywords.split(',')]
|
|
||||||
try:
|
|
||||||
description = keywords[0]
|
|
||||||
url = get_image_by_keyword(keywords[0])
|
|
||||||
img_tag = f"\n\n"
|
|
||||||
gpt_say += img_tag
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
return gpt_say
|
|
||||||
BIN
docs/gradio-3.32.2-py3-none-any.whl
Normal file
BIN
docs/gradio-3.32.2-py3-none-any.whl
Normal file
Binary file not shown.
@ -1,10 +1,8 @@
|
|||||||
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
import subprocess, sys
|
|
||||||
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gradio-stable-fork'])
|
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
if gr.__version__ not in ['3.28.3','3.32.3']: assert False, "请用 pip install -r requirements.txt 安装依赖"
|
if gr.__version__ not in ['3.28.3','3.32.2']: assert False, "需要特殊依赖,请务必用 pip install -r requirements.txt 指令安装依赖,详情信息见requirements.txt"
|
||||||
from request_llm.bridge_all import predict
|
from request_llm.bridge_all import predict
|
||||||
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
from toolbox import format_io, find_free_port, on_file_uploaded, on_report_generated, get_conf, ArgsGeneralWrapper, DummyWith
|
||||||
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
|
||||||
@ -56,7 +54,6 @@ def main():
|
|||||||
cancel_handles = []
|
cancel_handles = []
|
||||||
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
|
||||||
gr.HTML(title_html)
|
gr.HTML(title_html)
|
||||||
gr.HTML('''<center><a href="https://huggingface.co/spaces/qingxu98/gpt-academic?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>请您打开此页面后务必点击上方的“复制空间”(Duplicate Space)按钮!<font color="#FF00FF">使用时,先在输入框填入API-KEY然后回车。</font><br/>切忌在“复制空间”(Duplicate Space)之前填入API_KEY或进行提问,否则您的API_KEY将极可能被空间所有者攫取!<br/>支持任意数量的OpenAI的密钥和API2D的密钥共存,例如输入"OpenAI密钥1,API2D密钥2",然后提交,即可同时使用两种模型接口。</center>''')
|
|
||||||
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
|
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
|
||||||
with gr_L1():
|
with gr_L1():
|
||||||
with gr_L2(scale=2):
|
with gr_L2(scale=2):
|
||||||
@ -66,7 +63,7 @@ def main():
|
|||||||
with gr_L2(scale=1):
|
with gr_L2(scale=1):
|
||||||
with gr.Accordion("输入区", open=True) as area_input_primary:
|
with gr.Accordion("输入区", open=True) as area_input_primary:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
txt = gr.Textbox(show_label=False, lines=2, placeholder="输入问题或API密钥,输入多个密钥时,用英文逗号间隔。支持OpenAI密钥和API2D密钥共存。").style(container=False)
|
txt = gr.Textbox(show_label=False, placeholder="Input question here.").style(container=False)
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
submitBtn = gr.Button("提交", variant="primary")
|
submitBtn = gr.Button("提交", variant="primary")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
@ -200,7 +197,10 @@ def main():
|
|||||||
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
|
threading.Thread(target=warm_up_modules, name="warm-up", daemon=True).start()
|
||||||
|
|
||||||
auto_opentab_delay()
|
auto_opentab_delay()
|
||||||
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(server_name="0.0.0.0", share=False, favicon_path="docs/logo.png", blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
demo.queue(concurrency_count=CONCURRENT_COUNT).launch(
|
||||||
|
server_name="0.0.0.0", server_port=PORT,
|
||||||
|
favicon_path="docs/logo.png", auth=AUTHENTICATION,
|
||||||
|
blocked_paths=["config.py","config_private.py","docker-compose.yml","Dockerfile"])
|
||||||
|
|
||||||
# 如果需要在二级路径下运行
|
# 如果需要在二级路径下运行
|
||||||
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')
|
# CUSTOM_PATH, = get_conf('CUSTOM_PATH')
|
||||||
@ -152,7 +152,7 @@ model_info = {
|
|||||||
"token_cnt": get_token_num_gpt4,
|
"token_cnt": get_token_num_gpt4,
|
||||||
},
|
},
|
||||||
|
|
||||||
# 将 chatglm 直接对齐到 chatglm2
|
# chatglm
|
||||||
"chatglm": {
|
"chatglm": {
|
||||||
"fn_with_ui": chatglm_ui,
|
"fn_with_ui": chatglm_ui,
|
||||||
"fn_without_ui": chatglm_noui,
|
"fn_without_ui": chatglm_noui,
|
||||||
@ -161,15 +161,6 @@ model_info = {
|
|||||||
"tokenizer": tokenizer_gpt35,
|
"tokenizer": tokenizer_gpt35,
|
||||||
"token_cnt": get_token_num_gpt35,
|
"token_cnt": get_token_num_gpt35,
|
||||||
},
|
},
|
||||||
"chatglm2": {
|
|
||||||
"fn_with_ui": chatglm_ui,
|
|
||||||
"fn_without_ui": chatglm_noui,
|
|
||||||
"endpoint": None,
|
|
||||||
"max_token": 1024,
|
|
||||||
"tokenizer": tokenizer_gpt35,
|
|
||||||
"token_cnt": get_token_num_gpt35,
|
|
||||||
},
|
|
||||||
|
|
||||||
# newbing
|
# newbing
|
||||||
"newbing": {
|
"newbing": {
|
||||||
"fn_with_ui": newbing_ui,
|
"fn_with_ui": newbing_ui,
|
||||||
|
|||||||
@ -40,12 +40,12 @@ class GetGLMHandle(Process):
|
|||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
if self.chatglm_model is None:
|
if self.chatglm_model is None:
|
||||||
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
|
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
||||||
device, = get_conf('LOCAL_MODEL_DEVICE')
|
device, = get_conf('LOCAL_MODEL_DEVICE')
|
||||||
if device=='cpu':
|
if device=='cpu':
|
||||||
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
|
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
|
||||||
else:
|
else:
|
||||||
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
|
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
|
||||||
self.chatglm_model = self.chatglm_model.eval()
|
self.chatglm_model = self.chatglm_model.eval()
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -1,3 +1,4 @@
|
|||||||
|
./docs/gradio-3.32.2-py3-none-any.whl
|
||||||
tiktoken>=0.3.3
|
tiktoken>=0.3.3
|
||||||
requests[socks]
|
requests[socks]
|
||||||
transformers
|
transformers
|
||||||
|
|||||||
@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
|
|||||||
else:
|
else:
|
||||||
report_files = find_recent_files('gpt_log')
|
report_files = find_recent_files('gpt_log')
|
||||||
if len(report_files) == 0:
|
if len(report_files) == 0:
|
||||||
return cookies, None, chatbot
|
return None, chatbot
|
||||||
# files.extend(report_files)
|
# files.extend(report_files)
|
||||||
file_links = ''
|
file_links = ''
|
||||||
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'
|
||||||
|
|||||||
Reference in New Issue
Block a user