Compare commits

...

34 Commits

Author SHA1 Message Date
ec1cfaadba pip 2023-07-28 12:28:04 +08:00
2747c23868 Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-07-28 10:35:50 +08:00
f446dbb62d Update README.md 2023-07-28 09:54:03 +08:00
8d37d94e2c Update README.md 2023-07-28 09:53:17 +08:00
4216c5196e verify ignore history practice 2023-07-27 22:30:55 +08:00
2df660a718 Merge pull request #992 from yangchuansheng/master
Update README.md
2023-07-26 22:46:43 +08:00
bb496a9c2c Update README.md 2023-07-26 22:46:21 +08:00
4e0737c0c2 Update README.md 2023-07-26 22:46:02 +08:00
4bb3cba5c8 Update README.md 2023-07-26 18:53:42 +08:00
08b9b0d140 improve audio assistant documents 2023-07-26 18:51:33 +08:00
3577a72a3b add audio assistant docker compose solution 2023-07-26 18:39:32 +08:00
0328d6f498 add ALIYUN ACCESSKEY SECRET 2023-07-26 18:28:15 +08:00
d437305a4f add audio assistant docker 2023-07-26 18:16:59 +08:00
c4899bcb20 long-term aliyun access 2023-07-26 18:09:28 +08:00
4295764f8c Update README.md
添加 Sealos 部署方案
2023-07-25 16:38:37 +08:00
e4e2430255 version 3.47 2023-07-24 19:58:47 +08:00
1732127a28 Merge pull request #979 from fenglui/master
增加chatGLM int4配置支持 小显存也可以选择chatGLM
2023-07-24 19:52:27 +08:00
56bb8b6498 improve re efficiency 2023-07-24 18:50:29 +08:00
e93b6fa3a6 Add GLM INT8 2023-07-24 18:19:57 +08:00
dd4ba0ea22 Merge branch 'master' of https://github.com/fenglui/gpt_academic into fenglui-master 2023-07-24 18:06:15 +08:00
c2701c9ce5 Merge pull request #986 from one-pr/git-clone
默认仅 clone 最新的代码,减小 git clone 的大小
2023-07-24 17:48:35 +08:00
2f019ce359 优化 README.md 中的其他 git clone 2023-07-24 15:14:48 +08:00
c5b147aeb7 默认仅 clone 最新的代码,减小 git clone 的大小 2023-07-24 15:14:42 +08:00
5813d65e52 增加chatGLM int4配置支持 小显存也可以选择chatGLM 2023-07-22 08:29:15 +08:00
a393edfaa4 ALLOW CUSTOM API KEY PATTERN 2023-07-21 22:49:07 +08:00
dd7a01cda5 Merge pull request #976 from fenglui/master
fix msg.data.split(DELIMITER) exception when msg.data is int
2023-07-21 17:02:29 +08:00
00a3b91f95 fix msg.data.split(DELIMITER) exception when msg.data is int 2023-07-21 03:51:33 +08:00
61ba544282 add latex test samples 2023-07-20 19:49:23 +08:00
b5b8c123e4 latex plugin stability improvement 2023-07-20 19:39:22 +08:00
d9ceba959f expand range after failure 2023-07-20 18:39:02 +08:00
6b5b040701 remove pdf merge 2023-07-20 18:29:06 +08:00
4f4c09a5f3 增强Latex修复能力 2023-07-20 18:08:22 +08:00
067bc97cce Merge branch 'interface-interlm' of https://github.com/binary-husky/chatgpt_academic into interface-interlm 2023-07-20 12:46:52 +08:00
7368580cd6 concat pdf after translation 2023-07-20 12:46:48 +08:00
30 changed files with 856 additions and 544 deletions

View File

@ -0,0 +1,44 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: build-with-audio-assistant
on:
push:
branches:
- 'master'
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}_audio_assistant
jobs:
build-and-push-image:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build and push Docker image
uses: docker/build-push-action@v4
with:
context: .
push: true
file: docs/GithubAction+NoLocal+AudioAssistant
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

View File

@ -93,7 +93,7 @@ Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法
1. 下载项目
```sh
git clone https://github.com/binary-husky/gpt_academic.git
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
cd gpt_academic
```
@ -116,7 +116,7 @@ python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步
```
<details><summary>如果需要支持清华ChatGLM2/复旦MOSS作为后端请点击展开此处</summary>
<details><summary>如果需要支持清华ChatGLM2/复旦MOSS/RWKV作为后端,请点击展开此处</summary>
<p>
【可选步骤】如果需要支持清华ChatGLM2/复旦MOSS作为后端需要额外安装更多依赖前提条件熟悉Python + 用过Pytorch + 电脑配置够强):
@ -126,9 +126,12 @@ python -m pip install -r request_llm/requirements_chatglm.txt
# 【可选步骤II】支持复旦MOSS
python -m pip install -r request_llm/requirements_moss.txt
git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss # 注意执行此行代码时,必须处于项目根路径
git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss # 注意执行此行代码时,必须处于项目根路径
# 【可选步骤III】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型目前支持的全部模型如下(jittorllms系列目前仅支持docker方案)
# 【可选步骤III】支持RWKV Runner
参考wikihttps://github.com/binary-husky/gpt_academic/wiki/%E9%80%82%E9%85%8DRWKV-Runner
# 【可选步骤IV】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型目前支持的全部模型如下(jittorllms系列目前仅支持docker方案)
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
```
@ -147,9 +150,10 @@ python main.py
1. 仅ChatGPT推荐大多数人选择等价于docker-compose方案1
[![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
[![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
[![basicaudio](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-audio-assistant.yml)
``` sh
git clone https://github.com/binary-husky/gpt_academic.git # 下载项目
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git # 下载项目
cd gpt_academic # 进入路径
nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy” “API_KEY” 以及 “WEB_PORT” (例如50923) 等
docker build -t gpt-academic . # 安装
@ -195,10 +199,12 @@ docker-compose up
5. 远程云服务器部署(需要云服务器知识与经验)。
请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
6. 使用WSL2Windows Subsystem for Linux 子系统)
6. 使用Sealos[一键部署](https://github.com/binary-husky/gpt_academic/issues/993)
7. 使用WSL2Windows Subsystem for Linux 子系统)。
请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
7. 如何在二级网址(如`http://localhost/subpath`)下运行。
8. 如何在二级网址(如`http://localhost/subpath`)下运行。
请访问[FastAPI运行说明](docs/WithFastapi.md)

View File

@ -80,6 +80,7 @@ ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
# 设置gradio的并行线程数不需要修改
@ -131,9 +132,14 @@ put your new bing cookies here
# 阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
ENABLE_AUDIO = False
ALIYUN_TOKEN="" # 例如 f37f30e0f9934c34a992f6f64f7eba4f
ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
ALIYUN_TOKEN="" # 例如 f37f30e0f9934c34a992f6f64f7eba4f
ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
ALIYUN_ACCESSKEY="" # (无需填写)
ALIYUN_SECRET="" # (无需填写)
# Claude API KEY
ANTHROPIC_API_KEY = ""
ANTHROPIC_API_KEY = ""
# 自定义API KEY格式
CUSTOM_API_KEY_PATTERN = ""

View File

@ -1,7 +1,7 @@
# 'primary' 颜色对应 theme.py 中的 primary_hue
# 'secondary' 颜色对应 theme.py 中的 neutral_hue
# 'stop' 颜色对应 theme.py 中的 color_er
# 默认按钮颜色是 secondary
import importlib
from toolbox import clear_line_break
@ -14,7 +14,12 @@ def get_core_functions():
r"Furthermore, list all modification and explain the reasons to do so in markdown table." + "\n\n",
# 后语
"Suffix": r"",
"Color": r"secondary", # 按钮颜色
# 按钮颜色 (默认 secondary)
"Color": r"secondary",
# 按钮是否可见 (默认 True即可见)
"Visible": True,
# 是否在触发时清除历史 (默认 False即不处理之前的对话历史)
"AutoClearHistory": True
},
"中文学术润色": {
"Prefix": r"作为一名中文学术论文写作改进助理,你的任务是改进所提供文本的拼写、语法、清晰、简洁和整体可读性," +
@ -76,3 +81,13 @@ def get_core_functions():
"Suffix": r"",
}
}
def handle_core_functionality(additional_fn, inputs, history):
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
history = [] if core_functional[additional_fn].get("AutoClearHistory", False) else history
return inputs, history

View File

@ -157,7 +157,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_utils import Latex精细分解与转化, 编译Latex
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
@ -234,7 +234,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_utils import Latex精细分解与转化, 编译Latex
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])

View File

@ -195,9 +195,12 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2303.08774"
# txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
# txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
# txt = "https://arxiv.org/abs/2205.14135"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
txt = r"2210.03629"
txt = r"2307.04964"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
@ -240,7 +243,7 @@ if __name__ == "__main__":
# test_数学动画生成manim()
# test_Langchain知识库()
# test_Langchain知识库读取()
# test_Latex()
test_chatglm_finetune()
test_Latex()
# test_chatglm_finetune()
input("程序完成,回车退出。")
print("退出。")

View File

@ -1,320 +1,16 @@
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
import os, shutil
import re
import numpy as np
pj = os.path.join
"""
========================================================================
Part One
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
========================================================================
"""
PRESERVE = 0
TRANSFORM = 1
def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = -1
p = begin = end = res.regs[0][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p+1
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
"""
pattern_compile = re.compile(pattern, flags)
def search_with_line_limit(text, mask):
for res in pattern_compile.finditer(text):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
this, this_mask = search_with_line_limit(this, this_mask)
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
else:
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
return text, mask
return search_with_line_limit(text, mask)
class LinkedListNode():
"""
Linked List Node
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
# self.begin_line = 0
# self.begin_char = 0
def convert_to_linklist(text, mask):
root = LinkedListNode("", preserve=True)
current_node = root
for c, m, i in zip(text, mask, range(len(text))):
if (m==PRESERVE and current_node.preserve) \
or (m==TRANSFORM and not current_node.preserve):
# add
current_node.string += c
else:
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
current_node = current_node.next
return root
"""
========================================================================
Latex Merge File
========================================================================
"""
def 寻找Latex主文件(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
else:
continue
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.lstrip().startswith("%"):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
def find_tex_file_ignore_case(fp):
dir_name = os.path.dirname(fp)
base_name = os.path.basename(fp)
if not base_name.endswith('.tex'): base_name+='.tex'
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
# go case in-sensitive
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
"""
Merge Tex project recrusively
"""
main_file = rm_comments(main_file)
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp}Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
Merge Tex project recrusively
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
main_file = rm_comments(main_file)
if mode == 'translate_zh':
# find paper documentclass
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find documentclass statement!"
position = match.end()
add_ctex = '\\usepackage{ctex}\n'
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
return main_file
"""
========================================================================
Post process
========================================================================
"""
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
# walk and replace any _ without \
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
def compute_brace_level(string):
# this function count the number of { and }
brace_level = 0
for c in string:
if c == "{": brace_level += 1
elif c == "}": brace_level -= 1
return brace_level
def join_most(tex_t, tex_o):
# this function join translated string and original string when something goes wrong
p_t = 0
p_o = 0
def find_next(string, chars, begin):
p = begin
while p < len(string):
if string[p] in chars: return p, string[p]
p += 1
return None, None
while True:
res1, char = find_next(tex_o, ['{','}'], p_o)
if res1 is None: break
res2, char = find_next(tex_t, [char], p_t)
if res2 is None: break
p_o = res1 + 1
p_t = res2 + 1
return tex_t[:p_t] + tex_o[p_o:]
if compute_brace_level(final_tex) != compute_brace_level(node_string):
# 出问题了,还原部分原文,保证括号正确
final_tex = join_most(final_tex, node_string)
return final_tex
def split_subprocess(txt, project_folder, return_dict, opts):
"""
@ -326,7 +22,8 @@ def split_subprocess(txt, project_folder, return_dict, opts):
mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
# 吸收title与作者以上的部分
text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL)
# 吸收iffalse注释
text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
# 吸收在42行以内的begin-end组合
@ -356,77 +53,9 @@ def split_subprocess(txt, project_folder, return_dict, opts):
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
root = convert_to_linklist(text, mask)
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack fix')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
# 最后一步处理,增强稳健性
root = post_process(root)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
# 屏蔽空行和太短的句子
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
node = node.next
if node is None: break
node = root
while True:
if node.next and node.preserve and node.next.preserve:
node.string += node.next.string
node.next = node.next.next
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 输出html调试文件用红色标注处保留区PRESERVE用黑色标注转换区TRANSFORM
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
segment_parts_for_gpt = []
@ -437,7 +66,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
show_html = node.string.replace('\n','<br/>')
if not node.preserve:
segment_parts_for_gpt.append(node.string)
f.write(f'<p style="color:black;">#{show_html}#</p>')
f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
else:
f.write(f'<p style="color:red;">{show_html}</p>')
node = node.next
@ -448,8 +77,6 @@ def split_subprocess(txt, project_folder, return_dict, opts):
return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
return return_dict
class LatexPaperSplit():
"""
break down latex file to a linked list,
@ -464,18 +91,32 @@ class LatexPaperSplit():
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
def merge_result(self, arr, mode, msg):
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
"""
Merge the result after the GPT process completed
"""
result_string = ""
p = 0
node_cnt = 0
line_cnt = 0
for node in self.nodes:
if node.preserve:
line_cnt += node.string.count('\n')
result_string += node.string
else:
result_string += fix_content(arr[p], node.string)
p += 1
translated_txt = fix_content(arr[node_cnt], node.string)
begin_line = line_cnt
end_line = line_cnt + translated_txt.count('\n')
# reverse translation if any error
if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
translated_txt = node.string
result_string += translated_txt
node_cnt += 1
line_cnt += translated_txt.count('\n')
if mode == 'translate_zh':
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(result_string)
@ -490,6 +131,7 @@ class LatexPaperSplit():
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
return result_string
def split(self, txt, project_folder, opts):
"""
break down latex file to a linked list,
@ -511,7 +153,6 @@ class LatexPaperSplit():
return self.sp
class LatexPaperFileGroup():
"""
use tokenizer to break down text according to max_token_limit
@ -539,7 +180,7 @@ class LatexPaperFileGroup():
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
@ -560,41 +201,14 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
import shutil
from .crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
orig = ""
trans = ""
final = []
for c,r in zip(sp_file_contents, sp_file_result):
final.append(c)
final.append(r)
for i, k in enumerate(final):
if i%2==0:
orig = k
if i%2==1:
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
# <-------- 寻找主tex文件 ---------->
maintex = 寻找Latex主文件(file_manifest, mode)
maintex = find_main_tex_file(file_manifest, mode)
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
time.sleep(3)
@ -668,54 +282,51 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}"
final_tex = lps.merge_result(pfg.file_result, mode, msg)
objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
# <-------- 整理结果, 退出 ---------->
chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 返回 ---------->
return project_folder + f'/merge_{mode}.tex'
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
try:
with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
log = f.read()
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
file_lines = f.readlines()
import re
buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
buggy_lines = [int(l) for l in buggy_lines]
buggy_lines = sorted(buggy_lines)
print("removing lines that has errors", buggy_lines)
file_lines.pop(buggy_lines[0]-1)
buggy_line = buggy_lines[0]-1
print("reversing tex line that has errors", buggy_line)
# 重组,逆转出错的段落
if buggy_line not in fixed_line:
fixed_line.append(buggy_line)
lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
f.writelines(file_lines)
f.write(final_tex)
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
except:
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1]
def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
import os, time
current_dir = os.getcwd()
n_fix = 1
fixed_line = []
max_try = 32
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}如果程序停顿5分钟以上请直接去该路径下取回翻译结果或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
@ -723,6 +334,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
while True:
import os
may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
shutil.copyfile(may_exist_bbl, target_bbl)
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
@ -756,7 +371,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
# <---------- 检查结果 ----------->
results_ = ""
original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
@ -773,9 +387,19 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
# 将两个PDF拼接
if original_pdf_success:
try:
from .latex_toolbox import merge_pdfs
concat_pdf = pj(work_folder_modified, f'comparison.pdf')
merge_pdfs(origin_pdf, result_pdf, concat_pdf)
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
except Exception as e:
pass
return True # 成功啦
else:
if n_fix>=max_try: break
@ -787,6 +411,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
tex_name_pure=f'{main_file_modified}',
n_fix=n_fix,
work_folder_modified=work_folder_modified,
fixed_line=fixed_line
)
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break
@ -794,4 +419,29 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
return False # 失败啦
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
import shutil
from ..crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
orig = ""
trans = ""
final = []
for c,r in zip(sp_file_contents, sp_file_result):
final.append(c)
final.append(r)
for i, k in enumerate(final):
if i%2==0:
orig = k
if i%2==1:
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())

View File

@ -0,0 +1,456 @@
import os, shutil
import re
import numpy as np
PRESERVE = 0
TRANSFORM = 1
pj = os.path.join
class LinkedListNode():
"""
Linked List Node
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
self.range = None
# self.begin_line = 0
# self.begin_char = 0
def convert_to_linklist(text, mask):
root = LinkedListNode("", preserve=True)
current_node = root
for c, m, i in zip(text, mask, range(len(text))):
if (m==PRESERVE and current_node.preserve) \
or (m==TRANSFORM and not current_node.preserve):
# add
current_node.string += c
else:
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
current_node = current_node.next
return root
def post_process(root):
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack fix')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
# 屏蔽空行和太短的句子
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
node = node.next
if node is None: break
node = root
while True:
if node.next and node.preserve and node.next.preserve:
node.string += node.next.string
node.next = node.next.next
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 标注节点的行数范围
node = root
n_line = 0
expansion = 2
while True:
n_l = node.string.count('\n')
node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围
n_line = n_line+n_l
node = node.next
if node is None: break
return root
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = -1
p = begin = end = res.regs[0][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p+1
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
"""
pattern_compile = re.compile(pattern, flags)
def search_with_line_limit(text, mask):
for res in pattern_compile.finditer(text):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
this, this_mask = search_with_line_limit(this, this_mask)
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
else:
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
return text, mask
return search_with_line_limit(text, mask)
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Latex Merge File
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def find_main_tex_file(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个。
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
else:
continue
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.lstrip().startswith("%"):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
def find_tex_file_ignore_case(fp):
dir_name = os.path.dirname(fp)
base_name = os.path.basename(fp)
if not base_name.endswith('.tex'): base_name+='.tex'
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
# go case in-sensitive
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
"""
Merge Tex project recrusively
"""
main_file = rm_comments(main_file)
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp}Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
Merge Tex project recrusively
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
main_file = rm_comments(main_file)
if mode == 'translate_zh':
# find paper documentclass
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find documentclass statement!"
position = match.end()
add_ctex = '\\usepackage{ctex}\n'
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
return main_file
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Post process
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
# walk and replace any _ without \
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
def compute_brace_level(string):
# this function count the number of { and }
brace_level = 0
for c in string:
if c == "{": brace_level += 1
elif c == "}": brace_level -= 1
return brace_level
def join_most(tex_t, tex_o):
# this function join translated string and original string when something goes wrong
p_t = 0
p_o = 0
def find_next(string, chars, begin):
p = begin
while p < len(string):
if string[p] in chars: return p, string[p]
p += 1
return None, None
while True:
res1, char = find_next(tex_o, ['{','}'], p_o)
if res1 is None: break
res2, char = find_next(tex_t, [char], p_t)
if res2 is None: break
p_o = res1 + 1
p_t = res2 + 1
return tex_t[:p_t] + tex_o[p_o:]
if compute_brace_level(final_tex) != compute_brace_level(node_string):
# 出问题了,还原部分原文,保证括号正确
final_tex = join_most(final_tex, node_string)
return final_tex
def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def merge_pdfs(pdf1_path, pdf2_path, output_path):
import PyPDF2
Percent = 0.8
# Open the first PDF file
with open(pdf1_path, 'rb') as pdf1_file:
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
# Open the second PDF file
with open(pdf2_path, 'rb') as pdf2_file:
pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
# Create a new PDF file to store the merged pages
output_writer = PyPDF2.PdfFileWriter()
# Determine the number of pages in each PDF file
num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
# Merge the pages from the two PDF files
for page_num in range(num_pages):
# Add the page from the first PDF file
if page_num < pdf1_reader.numPages:
page1 = pdf1_reader.getPage(page_num)
else:
page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
# Add the page from the second PDF file
if page_num < pdf2_reader.numPages:
page2 = pdf2_reader.getPage(page_num)
else:
page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
# Create a new empty page with double width
new_page = PyPDF2.PageObject.createBlankPage(
width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
)
new_page.mergeTranslatedPage(page1, 0, 0)
new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
output_writer.addPage(new_page)
# Save the merged PDF file
with open(output_path, 'wb') as output_file:
output_writer.write(output_file)

View File

@ -19,7 +19,7 @@ class AliyunASR():
pass
def test_on_error(self, message, *args):
# print("on_error args=>{}".format(args))
print("on_error args=>{}".format(args))
pass
def test_on_close(self, *args):
@ -50,6 +50,8 @@ class AliyunASR():
rad.clean_up()
temp_folder = tempfile.gettempdir()
TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
if len(TOKEN) == 0:
TOKEN = self.get_token()
self.aliyun_service_ok = True
URL="wss://nls-gateway.aliyuncs.com/ws/v1"
sr = nls.NlsSpeechTranscriber(
@ -91,3 +93,38 @@ class AliyunASR():
self.stop = True
self.stop_msg = 'Aliyun音频服务异常请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
r = sr.stop()
def get_token(self):
from toolbox import get_conf
import json
from aliyunsdkcore.request import CommonRequest
from aliyunsdkcore.client import AcsClient
AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
# 创建AcsClient实例
client = AcsClient(
AccessKey_ID,
AccessKey_secret,
"cn-shanghai"
)
# 创建request并设置参数。
request = CommonRequest()
request.set_method('POST')
request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
request.set_version('2019-02-28')
request.set_action_name('CreateToken')
try:
response = client.do_action_with_exception(request)
print(response)
jss = json.loads(response)
if 'Token' in jss and 'Id' in jss['Token']:
token = jss['Token']['Id']
expireTime = jss['Token']['ExpireTime']
print("token = " + token)
print("expireTime = " + str(expireTime))
except Exception as e:
print(e)
return token

View File

@ -179,12 +179,12 @@ def 语音助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
import nls
from scipy import io
except:
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade aliyun-python-sdk-core==2.13.3 pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
if TOKEN == "" or APPKEY == "":
APPKEY = get_conf('ALIYUN_APPKEY')
if APPKEY == "":
chatbot.append(["导入依赖失败", "没有阿里云语音识别APPKEY和TOKEN, 详情见https://help.aliyun.com/document_detail/450255.html"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return

View File

@ -115,3 +115,36 @@ services:
command: >
bash -c "python3 -u main.py"
## ===================================================
## 【方案五】 ChatGPT + 语音助手 (请先阅读 docs/use_audio.md
## ===================================================
version: '3'
services:
gpt_academic_with_audio:
image: ghcr.io/binary-husky/gpt_academic_audio_assistant:master
environment:
# 请查阅 `config.py` 以查看所有的配置信息
API_KEY: ' fk195831-IdP0Pb3W6DCMUIbQwVX6MsSiyxwqybyS '
USE_PROXY: ' False '
proxies: ' None '
LLM_MODEL: ' gpt-3.5-turbo '
AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "gpt-4"] '
ENABLE_AUDIO: ' True '
LOCAL_MODEL_DEVICE: ' cuda '
DEFAULT_WORKER_NUM: ' 20 '
WEB_PORT: ' 12343 '
ADD_WAIFU: ' True '
THEME: ' Chuanhu-Small-and-Beautiful '
ALIYUN_APPKEY: ' RoP1ZrM84DnAFkZK '
ALIYUN_TOKEN: ' f37f30e0f9934c34a992f6f64f7eba4f '
# (无需填写) ALIYUN_ACCESSKEY: ' LTAI5q6BrFUzoRXVGUWnekh1 '
# (无需填写) ALIYUN_SECRET: ' eHmI20AVWIaQZ0CiTD2bGQVsaP9i68 '
# 与宿主的网络融合
network_mode: "host"
# 不使用代理网络拉取最新代码
command: >
bash -c "python3 -u main.py"

View File

@ -0,0 +1,22 @@
# 此Dockerfile适用于“无本地模型”的环境构建如果需要使用chatglm等本地模型请参考 docs/Dockerfile+ChatGLM
# 如何构建: 先修改 `config.py` 然后 docker build -t gpt-academic-nolocal -f docs/Dockerfile+NoLocal .
# 如何运行: docker run --rm -it --net=host gpt-academic-nolocal
FROM python:3.11
# 指定路径
WORKDIR /gpt
# 装载项目文件
COPY . .
# 安装依赖
RUN pip3 install -r requirements.txt
# 安装语音插件的额外依赖
RUN pip3 install pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
# 可选步骤,用于预热模块
RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
# 启动
CMD ["python3", "-u", "main.py"]

View File

@ -28,6 +28,16 @@ ALIYUN_APPKEY = "RoPlZrM88DnAFkZK" # 此appkey已经失效
参考 https://help.aliyun.com/document_detail/450255.html
先有阿里云开发者账号,登录之后,需要开通 智能语音交互 的功能可以免费获得一个token然后在 全部项目 中创建一个项目可以获得一个appkey.
- 进阶功能
进一步填写ALIYUN_ACCESSKEY和ALIYUN_SECRET实现自动获取ALIYUN_TOKEN
```
ALIYUN_APPKEY = "RoP1ZrM84DnAFkZK"
ALIYUN_TOKEN = ""
ALIYUN_ACCESSKEY = "LTAI5q6BrFUzoRXVGUWnekh1"
ALIYUN_SECRET = "eHmI20AVWIaQZ0CiTD2bGQVsaP9i68"
```
## 3.启动
启动gpt-academic `python main.py`
@ -48,7 +58,7 @@ III `[把特殊软件如腾讯会议的外放声音用VoiceMeeter截留]`
VI 两种音频监听模式切换时,需要刷新页面才有效。
VII 非localhost运行+非https情况下无法打开录音功能的坑https://blog.csdn.net/weixin_39461487/article/details/109594434
## 5.点击函数插件区“实时音频采集” 或者其他音频交互功能

View File

@ -37,15 +37,23 @@ class GetGLMHandle(Process):
# 子进程执行
# 第一次运行,加载参数
retry = 0
LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
if LOCAL_MODEL_QUANT == "INT4": # INT4
_model_name_ = "THUDM/chatglm2-6b-int4"
elif LOCAL_MODEL_QUANT == "INT8": # INT8
_model_name_ = "THUDM/chatglm2-6b-int8"
else:
_model_name_ = "THUDM/chatglm2-6b" # FP16
while True:
try:
if self.chatglm_model is None:
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE')
self.chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
if device=='cpu':
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
else:
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval()
break
else:
@ -136,11 +144,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -185,11 +185,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -129,11 +129,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
raw_input = inputs
logging.info(f'[raw_input] {raw_input}')

View File

@ -116,11 +116,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
raw_input = inputs
logging.info(f'[raw_input] {raw_input}')

View File

@ -290,11 +290,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -154,11 +154,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -154,11 +154,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -154,11 +154,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -224,11 +224,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
yield from update_ui(chatbot=chatbot, history=history)
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
# 处理历史信息
history_feedin = []

View File

@ -224,11 +224,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
history_feedin = []
for i in range(len(history)//2):

View File

@ -248,14 +248,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]:
inputs = core_functional[additional_fn]["PreProcess"](
inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + \
inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
history_feedin = []
for i in range(len(history)//2):

View File

@ -96,11 +96,8 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
additional_fn代表点击的哪个按钮按钮见functional.py
"""
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
from core_functional import handle_core_functionality
inputs, history = handle_core_functionality(additional_fn, inputs, history)
raw_input = "What I would like to say is the following: " + inputs
history.extend([inputs, ""])

View File

@ -519,7 +519,11 @@ class _ChatHub:
resp_txt_no_link = ""
while not final:
msg = await self.wss.receive()
objects = msg.data.split(DELIMITER)
try:
objects = msg.data.split(DELIMITER)
except :
continue
for obj in objects:
if obj is None or not obj:
continue

View File

@ -18,3 +18,4 @@ openai
numpy
arxiv
rich
pypdf2==2.12.1

50
setup.py Normal file
View File

@ -0,0 +1,50 @@
import setuptools, glob, os, fnmatch
with open("README.md", "r", encoding="utf-8") as fh:
long_description = fh.read()
def _process_requirements():
packages = open('requirements.txt').read().strip().split('\n')
requires = []
for pkg in packages:
if pkg.startswith('git+ssh'):
return_code = os.system('pip install {}'.format(pkg))
assert return_code == 0, 'error, status_code is: {}, exit!'.format(return_code)
if pkg.startswith('./docs'):
continue
else:
requires.append(pkg)
return requires
def package_files(directory):
import subprocess
list_of_files = subprocess.check_output("git ls-files", shell=True).splitlines()
return [str(k) for k in list_of_files]
extra_files = package_files('./')
setuptools.setup(
name="void-terminal",
version="0.0.0",
author="Qingxu",
author_email="505030475@qq.com",
description="LLM based APIs",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/binary-husky/gpt-academic",
project_urls={
"Bug Tracker": "https://github.com/binary-husky/gpt-academic/issues",
},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
package_dir={"": "."},
package_data={"": extra_files},
include_package_data=True,
packages=setuptools.find_packages(where="."),
python_requires=">=3.9",
install_requires=_process_requirements(),
)

View File

@ -538,7 +538,11 @@ def load_chat_cookies():
return {'api_key': API_KEY, 'llm_model': LLM_MODEL}
def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
if len(CUSTOM_API_KEY_PATTERN) != 0:
API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
else:
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
return bool(API_MATCH_ORIGINAL)
def is_azure_api_key(key):
@ -594,7 +598,7 @@ def select_api_key(keys, llm_model):
if is_azure_api_key(k): avail_key_list.append(k)
if len(avail_key_list) == 0:
raise RuntimeError(f"您提供的api-key不满足要求不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源右下角更换模型菜单中可切换openai,azureapi2d请求源")
raise RuntimeError(f"您提供的api-key不满足要求不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源右下角更换模型菜单中可切换openai,azure,claude,api2d请求源)")
api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key
@ -670,13 +674,14 @@ def read_single_conf_with_lru_cache(arg):
# 在读取API_KEY时检查一下是不是忘了改config
if arg == 'API_KEY':
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和API2D的api-key。也支持同时填写多个api-key如API_KEY=\"openai-key1,openai-key2,api2d-key3\"")
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s)也可以在问题输入区输入临时的api-key(s),然后回车键提交后即可生效。")
if is_any_api_key(r):
print亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
else:
print亮红( "[API_KEY] 正确的 API_KEY 'sk'开头的51位密钥OpenAI或者 'fk'开头的41位密钥请在config文件中修改API密钥之后再运行。")
print亮红( "[API_KEY] 的 API_KEY 不满足任何一种已知的密钥格式请在config文件中修改API密钥之后再运行。")
if arg == 'proxies':
if not read_single_conf_with_lru_cache('USE_PROXY'): r = None # 检查USE_PROXY防止proxies单独起作用
if r is None:
print亮红('[PROXY] 网络代理状态未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议检查USE_PROXY选项是否修改。')
else:
@ -685,6 +690,7 @@ def read_single_conf_with_lru_cache(arg):
return r
@lru_cache(maxsize=128)
def get_conf(*args):
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
res = []

View File

@ -1,5 +1,5 @@
{
"version": 3.46,
"version": 3.47,
"show_feature": true,
"new_feature": "临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件"
}
"new_feature": "优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持"
}