Compare commits

..

36 Commits

Author SHA1 Message Date
066b5d4d29 update pdf translate fn 2023-06-30 13:04:33 +08:00
2373348c9e 修改布局,删除本地LLM接口 2023-06-30 12:51:25 +08:00
3f8a145c1d Merge branch 'master' of https://github.com/kaixindelele/gpt_academic into chatpaper-master 2023-06-30 12:01:08 +08:00
22f377e2fb fix multi user cwd shift 2023-06-30 11:05:47 +08:00
37172906ef 修复文件导出的bug 2023-06-29 14:55:55 +08:00
3b78e0538b 修复插件demo的图像显示的问题 2023-06-29 14:52:58 +08:00
d8f9ac71d0 Merge pull request #907 from Xminry/master
feat:联网搜索功能,cn.bing.com版,国内可用
2023-06-29 12:44:32 +08:00
aced272d3c 微调插件提示 2023-06-29 12:43:50 +08:00
aff77a086d Merge branch 'master' of https://github.com/Xminry/gpt_academic into Xminry-master 2023-06-29 12:38:43 +08:00
49253c4dc6 [arxiv trans] add html comparison to zip file 2023-06-29 12:29:49 +08:00
1a00093015 修复提示 2023-06-29 12:15:52 +08:00
64f76e7401 3.42 2023-06-29 11:32:19 +08:00
eb4c07997e 修复Latex矫错和本地Latex论文翻译的问题 2023-06-29 11:30:42 +08:00
99cf7205c3 feat:联网搜索功能,cn.bing.com版,国内可用 2023-06-28 10:30:08 +08:00
d684b4cdb3 Merge pull request #905 from Xminry/master
Update 理解PDF文档内容.py
2023-06-27 23:37:25 +08:00
146fde30b8 Update latex_utils.py 2023-06-27 17:31:09 +08:00
e79dcb1b48 fix bugs of baidu stats 2023-06-26 20:27:31 -07:00
0aadeabccc add baidu stats 2023-06-27 10:05:36 +08:00
4290821a50 Update 理解PDF文档内容.py 2023-06-27 01:57:31 +08:00
4bc073b072 readme 2023-06-23 10:53:29 +08:00
756bd29f0c Update main.py 2023-06-21 18:49:57 +08:00
66c9e9a3cf arxiv_cache 2023-06-21 18:04:37 +08:00
ca49af1e53 functional 2023-06-21 18:02:08 +08:00
78df094eb9 update 2023-06-21 17:58:13 +08:00
b24e664a85 Update latex_utils.py 2023-06-20 21:06:34 +08:00
af3a1901a0 Update Latex输出PDF结果.py 2023-06-20 21:06:13 +08:00
8affcd92a9 Update crazy_functional.py 2023-06-20 21:04:49 +08:00
d83e0a7704 Update main.py 2023-06-20 21:04:32 +08:00
78c53b6bec Update crazy_functional.py 2023-06-19 20:58:38 +08:00
84e09766cd Rename build-image to build-image.yaml 2023-06-19 17:16:26 +08:00
a84f4f43bf delete unused action 2023-06-19 17:15:32 +08:00
cb7f6984a2 Create build-image 2023-06-19 17:07:09 +08:00
5703beb06b Merge pull request #1 from kaixindelele/patch-1
Patch 1
2023-06-19 15:18:42 +08:00
fcb0f466b9 Update crazy_functional.py 2023-06-19 15:11:32 +08:00
1b31d2e0d5 Merge branch 'binary-husky:master' into master 2023-06-19 14:26:41 +08:00
baa26e67ef arxiv_cache 2023-06-18 13:37:32 +08:00
19 changed files with 361 additions and 262 deletions

38
.github/workflows/build-image.yaml vendored Normal file
View File

@ -0,0 +1,38 @@
name: Build Image
on:
workflow_dispatch:
inputs:
release_tag:
description: 'Tag for the images'
required: true
env:
REGISTRY: registry.cn-hongkong.aliyuncs.com
NAMESPACE: chatwithpaper
IMAGE: academic
TAG: ${{ github.event.inputs.release_tag || github.event.client_payload.release_tag }}
jobs:
build:
runs-on: ubuntu-latest
environment: production
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Login to Registry
uses: docker/login-action@v2.1.0
with:
registry: "${{ env.REGISTRY }}"
username: "${{ secrets.ACR_USER }}"
password: "${{ secrets.ACR_PASSWORD }}"
- name: Build and push image
uses: docker/build-push-action@v4
with:
context: .
file: docs/Dockerfile+NoLocal+Latex
tags: ${{ env.REGISTRY }}/${{ env.NAMESPACE }}/${{ env.IMAGE }}:${{ env.TAG }}
push: true

View File

@ -1,44 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_chatglm_moss
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+ChatGLM+Moss
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -1,44 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_jittorllms
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+JittorLLMs
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -1,44 +0,0 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_nolocal
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+NoLocal
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -47,7 +47,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 ) # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓ LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"] AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"] # P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU # 本地LLM模型如ChatGLM的执行方式 CPU/GPU

View File

@ -42,6 +42,7 @@ def get_core_functions():
"中译英": { "中译英": {
"Prefix": r"Please translate following sentence to English:" + "\n\n", "Prefix": r"Please translate following sentence to English:" + "\n\n",
"Suffix": r"", "Suffix": r"",
"Visible": False,
}, },
"学术中英互译": { "学术中英互译": {
"Prefix": r"I want you to act as a scientific English-Chinese translator, " + "Prefix": r"I want you to act as a scientific English-Chinese translator, " +
@ -63,6 +64,7 @@ def get_core_functions():
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL" + "Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL" +
r"然后请使用Markdown格式封装并且不要有反斜线不要用代码块。现在请按以下描述给我发送图片" + "\n\n", r"然后请使用Markdown格式封装并且不要有反斜线不要用代码块。现在请按以下描述给我发送图片" + "\n\n",
"Suffix": r"", "Suffix": r"",
"Visible": False,
}, },
"解释代码": { "解释代码": {
"Prefix": r"请解释以下代码:" + "\n```\n", "Prefix": r"请解释以下代码:" + "\n```\n",

View File

@ -26,6 +26,7 @@ def get_crazy_functions():
from crazy_functions.对话历史存档 import 删除所有本地对话历史记录 from crazy_functions.对话历史存档 import 删除所有本地对话历史记录
from crazy_functions.批量Markdown翻译 import Markdown英译中 from crazy_functions.批量Markdown翻译 import Markdown英译中
function_plugins = { function_plugins = {
"解析整个Python项目": { "解析整个Python项目": {
"Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
@ -47,10 +48,10 @@ def get_crazy_functions():
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "若输入0则不解析notebook中的Markdown块", # 高级参数输入区的显示提示 "ArgsReminder": "若输入0则不解析notebook中的Markdown块", # 高级参数输入区的显示提示
}, },
"批量总结Word文档": { # "批量总结Word文档": {
"Color": "stop", # "Color": "stop",
"Function": HotReload(总结word文档) # "Function": HotReload(总结word文档)
}, # },
"解析整个C++项目头文件": { "解析整个C++项目头文件": {
"Color": "stop", # 按钮颜色 "Color": "stop", # 按钮颜色
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
@ -108,10 +109,10 @@ def get_crazy_functions():
"保存当前的对话": { "保存当前的对话": {
"Function": HotReload(对话历史存档) "Function": HotReload(对话历史存档)
}, },
"[多线程Demo] 解析此项目本身(源码自译解)": { # "[多线程Demo] 解析此项目本身(源码自译解)": {
"AsButton": False, # 加入下拉菜单中 # "AsButton": False, # 加入下拉菜单中
"Function": HotReload(解析项目本身) # "Function": HotReload(解析项目本身)
}, # },
# "[老旧的Demo] 把本项目源代码切换成全英文": { # "[老旧的Demo] 把本项目源代码切换成全英文": {
# # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效 # # HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
# "AsButton": False, # 加入下拉菜单中 # "AsButton": False, # 加入下拉菜单中
@ -137,15 +138,15 @@ def get_crazy_functions():
from crazy_functions.批量Markdown翻译 import Markdown中译英 from crazy_functions.批量Markdown翻译 import Markdown中译英
function_plugins.update({ function_plugins.update({
"批量翻译PDF文档多线程": { "本地PDF全文翻译": {
"Color": "stop", "Color": "stop",
"AsButton": True, # 加入下拉菜单中 "AsButton": True, # 加入下拉菜单中
"Function": HotReload(批量翻译PDF文档) "Function": HotReload(批量翻译PDF文档)
}, },
"询问多个GPT模型": { # "询问多个GPT模型": {
"Color": "stop", # 按钮颜色 # "Color": "stop", # 按钮颜色
"Function": HotReload(同时问询) # "Function": HotReload(同时问询)
}, # },
"[测试功能] 批量总结PDF文档": { "[测试功能] 批量总结PDF文档": {
"Color": "stop", "Color": "stop",
"AsButton": False, # 加入下拉菜单中 "AsButton": False, # 加入下拉菜单中
@ -222,46 +223,57 @@ def get_crazy_functions():
}) })
except: except:
print('Load function plugin failed') print('Load function plugin failed')
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
try:
from crazy_functions.联网的ChatGPT import 连接网络回答问题
function_plugins.update({ function_plugins.update({
"连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌": { "ArXiv Latex一键翻译输入区给定arXiv ID": {
"Color": "stop", "Color": "stop",
"AsButton": False, # 加入下拉菜单中 "AsButton": True,
"Function": HotReload(连接网络回答问题) "AdvancedArgs": True,
"ArgsReminder":
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
} }
}) })
except: # try:
print('Load function plugin failed') # from crazy_functions.联网的ChatGPT import 连接网络回答问题
# function_plugins.update({
# "连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": {
# "Color": "stop",
# "AsButton": False, # 加入下拉菜单中
# "Function": HotReload(连接网络回答问题)
# }
# })
# except:
# print('Load function plugin failed')
try: # try:
from crazy_functions.解析项目源代码 import 解析任意code项目 # from crazy_functions.解析项目源代码 import 解析任意code项目
function_plugins.update({ # function_plugins.update({
"解析项目源代码(手动指定和筛选源代码文件类型)": { # "解析项目源代码(手动指定和筛选源代码文件类型)": {
"Color": "stop", # "Color": "stop",
"AsButton": False, # "AsButton": False,
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False # "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示 # "ArgsReminder": "输入时用逗号隔开, *代表通配符, 加了^代表不匹配; 不输入代表全部匹配。例如: \"*.c, ^*.cpp, config.toml, ^*.toml\"", # 高级参数输入区的显示提示
"Function": HotReload(解析任意code项目) # "Function": HotReload(解析任意code项目)
}, # },
}) # })
except: # except:
print('Load function plugin failed') # print('Load function plugin failed')
try: # try:
from crazy_functions.询问多个大语言模型 import 同时问询_指定模型 # from crazy_functions.询问多个大语言模型 import 同时问询_指定模型
function_plugins.update({ # function_plugins.update({
"询问多个GPT模型手动指定询问哪些模型": { # "询问多个GPT模型手动指定询问哪些模型": {
"Color": "stop", # "Color": "stop",
"AsButton": False, # "AsButton": False,
"AdvancedArgs": True, # 调用时唤起高级参数输入区默认False # "AdvancedArgs": True, # 调用时唤起高级参数输入区默认False
"ArgsReminder": "支持任意数量的llm接口用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示 # "ArgsReminder": "支持任意数量的llm接口用&符号分隔。例如chatglm&gpt-3.5-turbo&api2d-gpt-4", # 高级参数输入区的显示提示
"Function": HotReload(同时问询_指定模型) # "Function": HotReload(同时问询_指定模型)
}, # },
}) # })
except: # except:
print('Load function plugin failed') # print('Load function plugin failed')
try: try:
from crazy_functions.图片生成 import 图片生成 from crazy_functions.图片生成 import 图片生成
@ -348,26 +360,15 @@ def get_crazy_functions():
try: try:
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比 from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
function_plugins.update({ function_plugins.update({
"[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": { "Latex英文纠错+高亮修正位置 [需Latex]": {
"Color": "stop",
"AsButton": False,
# "AdvancedArgs": True,
# "ArgsReminder": "",
"Function": HotReload(Latex英文纠错加PDF对比)
}
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
"Arixv翻译输入arxivID [需Latex]": {
"Color": "stop", "Color": "stop",
"AsButton": False, "AsButton": False,
"AdvancedArgs": True, "AdvancedArgs": True,
"ArgsReminder": "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
"如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+ "Function": HotReload(Latex英文纠错加PDF对比)
"例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
} }
}) })
# function_plugins.update({ # function_plugins.update({
# "本地论文翻译上传Latex压缩包 [需Latex]": { # "本地论文翻译上传Latex压缩包 [需Latex]": {
# "Color": "stop", # "Color": "stop",
@ -396,4 +397,18 @@ def get_crazy_functions():
# except: # except:
# print('Load function plugin failed') # print('Load function plugin failed')
# try:
# from crazy_functions.虚空终端 import 终端
# function_plugins.update({
# "超级终端": {
# "Color": "stop",
# "AsButton": False,
# # "AdvancedArgs": True,
# # "ArgsReminder": "",
# "Function": HotReload(终端)
# }
# })
# except:
# print('Load function plugin failed')
return function_plugins return function_plugins

View File

@ -3,7 +3,9 @@ from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip
from functools import partial from functools import partial
import glob, os, requests, time import glob, os, requests, time
pj = os.path.join pj = os.path.join
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/") # ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
# ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
ARXIV_CACHE_DIR = os.getenv("Arxiv_Cache")
# =================================== 工具函数 =============================================== # =================================== 工具函数 ===============================================
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". ' 专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
@ -19,9 +21,9 @@ def switch_prompt(pfg, mode, more_requirement):
- sys_prompt_array: A list of strings containing prompts for system prompts. - sys_prompt_array: A list of strings containing prompts for system prompts.
""" """
n_split = len(pfg.sp_file_contents) n_split = len(pfg.sp_file_contents)
if mode == 'proofread': if mode == 'proofread_en':
inputs_array = [r"Below is a section from an academic paper, proofread this section." + inputs_array = [r"Below is a section from an academic paper, proofread this section." +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
r"Answer me only with the revised text:" + r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents] f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
@ -70,6 +72,12 @@ def move_project(project_folder, arxiv_id=None):
shutil.rmtree(new_workfolder) shutil.rmtree(new_workfolder)
except: except:
pass pass
# align subfolder if there is a folder wrapper
items = glob.glob(pj(project_folder,'*'))
if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
if os.path.isdir(items[0]): project_folder = items[0]
shutil.copytree(src=project_folder, dst=new_workfolder) shutil.copytree(src=project_folder, dst=new_workfolder)
return new_workfolder return new_workfolder
@ -142,6 +150,10 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4其他模型转化效果未知。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。仅在Windows系统进行了测试其他操作系统表现未知。"]) "对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4其他模型转化效果未知。目前对机器学习类文献转化效果最好其他类型文献转化效果未知。仅在Windows系统进行了测试其他操作系统表现未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------------- more requirements ------------->
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
more_req = plugin_kwargs.get("advanced_arg", "")
_switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps -------------> # <-------------- check deps ------------->
try: try:
@ -186,7 +198,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- compile PDF -------------> # <-------------- compile PDF ------------->
success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread', success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder) work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
@ -195,6 +207,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
if success: if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...')) chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else: else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
@ -278,6 +291,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
if success: if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...')) chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else: else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...')) chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面

View File

@ -190,11 +190,12 @@ def test_Latex():
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE # txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
# txt = r"https://arxiv.org/abs/2002.09253" # txt = r"https://arxiv.org/abs/2002.09253"
# txt = r"https://arxiv.org/abs/2306.07831" # txt = r"https://arxiv.org/abs/2306.07831"
txt = r"https://arxiv.org/abs/2212.10156" # txt = r"https://arxiv.org/abs/2212.10156"
# txt = r"https://arxiv.org/abs/2211.11559" # txt = r"https://arxiv.org/abs/2211.11559"
# txt = r"https://arxiv.org/abs/2303.08774" # txt = r"https://arxiv.org/abs/2303.08774"
# txt = r"https://arxiv.org/abs/2303.12712" # txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder" # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"C:\Users\fuqingxu\Desktop\9"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):

View File

@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0):
mask[res.span()[0]:res.span()[1]] = PRESERVE mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0): def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
""" """
Add a preserve text area in this paper (text become untouchable for GPT). Add a preserve text area in this paper (text become untouchable for GPT).
@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
# reverse 操作必须放在最后 # reverse 操作必须放在最后
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True) text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True) text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
root = convert_to_linklist(text, mask) root = convert_to_linklist(text, mask)
# 修复括号 # 修复括号
@ -432,7 +451,8 @@ class LatexPaperSplit():
self.nodes = None self.nodes = None
self.msg = "{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \ self.msg = "{\\scriptsize\\textbf{警告该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成" + \
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。" "项目Github地址: \\url{https://github.com/binary-husky/gpt_academic/}。" + \
"项目在线体验地址: \\url{https://chatpaper.org}。"
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者 # 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\" self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
@ -532,11 +552,11 @@ class LatexPaperFileGroup():
f.write(res) f.write(res)
return manifest return manifest
def write_html(sp_file_contents, sp_file_result, chatbot): def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html # write html
try: try:
import copy import shutil
from .crazy_utils import construct_html from .crazy_utils import construct_html
from toolbox import gen_time_str from toolbox import gen_time_str
ch = construct_html() ch = construct_html()
@ -554,6 +574,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot):
ch.add_row(a=orig, b=trans) ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html" create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name) ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot) promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except: except:
from toolbox import trimmed_format_exc from toolbox import trimmed_format_exc
@ -634,7 +655,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
pfg.get_token_num = None pfg.get_token_num = None
objdump(pfg, file=pj(project_folder,'temp.pkl')) objdump(pfg, file=pj(project_folder,'temp.pkl'))
write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot) write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
# <-------- 写出文件 ----------> # <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}" msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}"
@ -671,10 +692,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.") print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1] return False, -1, [-1]
def compile_latex_with_timeout(command, cwd, timeout=60):
def compile_latex_with_timeout(command, timeout=60):
import subprocess import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try: try:
stdout, stderr = process.communicate(timeout=timeout) stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
@ -698,24 +718,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')): if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
# 只有第二步成功,才能继续下面的步骤 # 只有第二步成功,才能继续下面的步骤
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')): if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')): if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
if mode!='translate_zh': if mode!='translate_zh':
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
@ -723,13 +743,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex') ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir) ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
# <--------------------->
os.chdir(current_dir)
# <---------- 检查结果 -----------> # <---------- 检查结果 ----------->
results_ = "" results_ = ""
@ -741,13 +759,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
results_ += f"对比PDF编译是否成功: {diff_pdf_success};" results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
yield from update_ui_lastest_msg(f'{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
if diff_pdf_success:
result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
if modified_pdf_success: if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(current_dir) result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
if os.path.exists(pj(work_folder, '..', 'translation')): if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf')) shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
return True # 成功啦 return True # 成功啦
else: else:
if n_fix>=max_try: break if n_fix>=max_try: break
@ -763,7 +783,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面 yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break if not can_retry: break
os.chdir(current_dir)
return False # 失败啦 return False # 失败啦

View File

@ -1,5 +1,5 @@
from toolbox import CatchException, report_execption, write_results_to_file from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui from toolbox import update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import read_and_clean_pdf_text from .crazy_utils import read_and_clean_pdf_text
@ -147,23 +147,14 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
print('writing html result failed:', trimmed_format_exc()) print('writing html result failed:', trimmed_format_exc())
# 准备文件的下载 # 准备文件的下载
import shutil
for pdf_path in generated_conclusion_files: for pdf_path in generated_conclusion_files:
# 重命名文件 # 重命名文件
rename_file = f'./gpt_log/翻译-{os.path.basename(pdf_path)}' rename_file = f'翻译-{os.path.basename(pdf_path)}'
if os.path.exists(rename_file): promote_file_to_downloadzone(pdf_path, rename_file=rename_file, chatbot=chatbot)
os.remove(rename_file)
shutil.copyfile(pdf_path, rename_file)
if os.path.exists(pdf_path):
os.remove(pdf_path)
for html_path in generated_html_files: for html_path in generated_html_files:
# 重命名文件 # 重命名文件
rename_file = f'./gpt_log/翻译-{os.path.basename(html_path)}' rename_file = f'翻译-{os.path.basename(html_path)}'
if os.path.exists(rename_file): promote_file_to_downloadzone(html_path, rename_file=rename_file, chatbot=chatbot)
os.remove(rename_file)
shutil.copyfile(html_path, rename_file)
if os.path.exists(html_path):
os.remove(html_path)
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files))) chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@ -13,6 +13,8 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
# 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割 # 递归地切割PDF文件每一块尽量是完整的一个section比如introductionexperiment等必要时再进行切割
# 的长度必须小于 2500 个 Token # 的长度必须小于 2500 个 Token
file_content, page_one = read_and_clean_pdf_text(file_name) # 尝试按照章节切割PDF file_content, page_one = read_and_clean_pdf_text(file_name) # 尝试按照章节切割PDF
file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
TOKEN_LIMIT_PER_FRAGMENT = 2500 TOKEN_LIMIT_PER_FRAGMENT = 2500

View File

@ -0,0 +1,102 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
import requests
from bs4 import BeautifulSoup
from request_llm.bridge_all import model_info
def bing_search(query, proxies=None):
query = query
url = f"https://cn.bing.com/search?q={query}"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
response = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(response.content, 'html.parser')
results = []
for g in soup.find_all('li', class_='b_algo'):
anchors = g.find_all('a')
if anchors:
link = anchors[0]['href']
if not link.startswith('http'):
continue
title = g.find('h2').text
item = {'title': title, 'link': link}
results.append(item)
for r in results:
print(r['link'])
return results
def scrape_text(url, proxies) -> str:
"""Scrape text from a webpage
Args:
url (str): The URL to scrape text from
Returns:
str: The scraped text
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
'Content-Type': 'text/plain',
}
try:
response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
except:
return "无法连接到该网页"
soup = BeautifulSoup(response.text, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)
return text
@CatchException
def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
"[Local Message] 请注意,您正在调用一个[函数插件]的模板该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者它可以作为创建新功能函数的模板。您若希望分享新的功能模组请不吝PR"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第1步爬取搜索引擎的结果 > -------------
from toolbox import get_conf
proxies, = get_conf('proxies')
urls = bing_search(txt, proxies)
history = []
# ------------- < 第2步依次访问网页 > -------------
max_search_result = 8 # 最多收纳多少个网页的结果
for index, url in enumerate(urls[:max_search_result]):
res = scrape_text(url['link'], proxies)
history.extend([f"{index}份搜索结果:", res])
chatbot.append([f"{index}份搜索结果:", res[:500]+"......"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# ------------- < 第3步ChatGPT综合 > -------------
i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
i_say, history = input_clipping( # 裁剪输入从最长的条目开始裁剪防止爆token
inputs=i_say,
history=history,
max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
)
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新

View File

@ -1,6 +1,7 @@
from toolbox import CatchException, update_ui from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import datetime import datetime, re
@CatchException @CatchException
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
""" """
@ -18,12 +19,34 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
for i in range(5): for i in range(5):
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
i_say = f'历史中哪些事件发生在{currentMonth}{currentDay}日?列举两条并发送相关图片。发送图片时,使用Markdown将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词' i_say = f'历史中哪些事件发生在{currentMonth}{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive( gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say, inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[], llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt="当你想发送一张照片时请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。" sys_prompt='输出格式示例1908年美国消防救援事业发展的“美国消防协会”成立。关键词{"KeyWords":["Fire","American"]}。'
) )
gpt_say = get_images(gpt_say)
chatbot[-1] = (i_say, gpt_say) chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say) history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
def get_images(gpt_say):
def get_image_by_keyword(keyword):
import requests
from bs4 import BeautifulSoup
response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
if "data-src" in image_element: break
return image_element["data-src"]
for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say):
keywords = [n.strip('"') for n in keywords.split(',')]
try:
description = keywords[0]
url = get_image_by_keyword(keywords[0])
img_tag = f"\n\n![{description}]({url})"
gpt_say += img_tag
except:
continue
return gpt_say

View File

@ -96,15 +96,6 @@
● 部署名(不是模型名) ● 部署名(不是模型名)
# 修改 config.py
```
AZURE_ENDPOINT = "填入终结点"
AZURE_API_KEY = "填入azure openai api的密钥"
AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改
AZURE_ENGINE = "填入部署名"
```
# API的使用 # API的使用
接下来就是具体怎么使用API了还是可以参考官方文档[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python) 接下来就是具体怎么使用API了还是可以参考官方文档[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)

27
main.py
View File

@ -1,4 +1,5 @@
import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染 import os; os.environ['no_proxy'] = '*' # 避免代理网络产生意外污染
from pathlib import Path
def main(): def main():
import gradio as gr import gradio as gr
@ -15,7 +16,7 @@ def main():
from check_proxy import get_current_version from check_proxy import get_current_version
initial_prompt = "Serve me as a writing and programming assistant." initial_prompt = "Serve me as a writing and programming assistant."
title_html = f"<h1 align=\"center\">ChatGPT 学术优化 {get_current_version()}</h1>" title_html = f"<h1 align=\"center\">ChatGPT 学术优化 网页测试版 {get_current_version()}</h1>"
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)""" description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
# 问询记录, python 版本建议3.9+(越新越好) # 问询记录, python 版本建议3.9+(越新越好)
@ -52,7 +53,21 @@ def main():
CHATBOT_HEIGHT /= 2 CHATBOT_HEIGHT /= 2
cancel_handles = [] cancel_handles = []
# Read your Baidu statistics code from the file
baidu_stats_code = Path('./sites/baidu_stats.html').read_text()
with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo: with gr.Blocks(title="ChatGPT 学术优化", theme=set_theme, analytics_enabled=False, css=advanced_css) as demo:
# Insert your Baidu statistics code here
gradio_original_template_fn = gr.routes.templates.TemplateResponse
def gradio_new_template_fn(*args, **kwargs):
res = gradio_original_template_fn(*args, **kwargs)
res.body = res.body.replace(b'</html>', f'{baidu_stats_code}</html>'.encode("utf8"))
res.init_headers()
return res
gr.routes.templates.TemplateResponse = gradio_new_template_fn # override gradio template
# Insert Title
gr.HTML(title_html) gr.HTML(title_html)
cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL}) cookies = gr.State({'api_key': API_KEY, 'llm_model': LLM_MODEL})
with gr_L1(): with gr_L1():
@ -71,7 +86,13 @@ def main():
stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm") stopBtn = gr.Button("停止", variant="secondary"); stopBtn.style(size="sm")
clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm") clearBtn = gr.Button("清除", variant="secondary", visible=False); clearBtn.style(size="sm")
with gr.Row(): with gr.Row():
status = gr.Markdown(f"Tip: 按Enter提交, 按Shift+Enter换行当前模型: {LLM_MODEL} \n {proxy_info}") status = gr.Markdown(f"""Tips: 1. 按Enter提交, 按Shift+Enter换行2. 当前模型: {LLM_MODEL} \n {proxy_info}.
3. 请注意隐私保护和遵守法律法规;
4. 请勿使用本服务进行违法犯罪活动;
5. 我和qingxu都希望能够为大家提供一个好的**学术工具**,希望大家不要攻击和滥用本服务;
6. 本服务还存在各种bug如果发现bug欢迎加群反馈或者发issue告诉我们
7. 希望大家能结合ChatPaper的速读找到需要精读的再用本工具的全文翻译实现快速知识摄取。
""")
with gr.Accordion("基础功能区", open=True) as area_basic_fn: with gr.Accordion("基础功能区", open=True) as area_basic_fn:
with gr.Row(): with gr.Row():
for k in functional: for k in functional:
@ -167,6 +188,7 @@ def main():
ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")}) ret.update({plugin_advanced_arg: gr.update(visible=False, label=f"插件[{k}]不需要高级参数。")})
return ret return ret
dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] ) dropdown.select(on_dropdown_changed, [dropdown], [switchy_bt, plugin_advanced_arg] )
def on_md_dropdown_changed(k): def on_md_dropdown_changed(k):
return {chatbot: gr.update(label="当前模型:"+k)} return {chatbot: gr.update(label="当前模型:"+k)}
md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] ) md_dropdown.select(on_md_dropdown_changed, [md_dropdown], [chatbot] )
@ -180,6 +202,7 @@ def main():
# 终止按钮的回调函数注册 # 终止按钮的回调函数注册
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles) stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
demo.load(on_dropdown_changed, inputs=gr.State("ArXiv Latex一键翻译输入区给定arXiv ID"), outputs=[switchy_bt, plugin_advanced_arg])
# gradio的inbrowser触发不太稳定回滚代码到原始的浏览器打开函数 # gradio的inbrowser触发不太稳定回滚代码到原始的浏览器打开函数
def auto_opentab_delay(): def auto_opentab_delay():

10
sites/baidu_stats.html Normal file
View File

@ -0,0 +1,10 @@
<!-- baidu_stats.html -->
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "https://hm.baidu.com/hm.js?208673d55832a94b9bbe10b1f4e70c09";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>

View File

@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
else: else:
report_files = find_recent_files('gpt_log') report_files = find_recent_files('gpt_log')
if len(report_files) == 0: if len(report_files) == 0:
return None, chatbot return cookies, None, chatbot
# files.extend(report_files) # files.extend(report_files)
file_links = '' file_links = ''
for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>' for f in report_files: file_links += f'<br/><a href="file={os.path.abspath(f)}" target="_blank">{f}</a>'

View File

@ -1,5 +1,5 @@
{ {
"version": 3.41, "version": 3.42,
"show_feature": true, "show_feature": true,
"new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持" "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
} }