如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处
@@ -160,7 +162,7 @@ docker run --rm -it --net=host gpt-academic
#(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
```
-P.S. 如果需要Latex功能,请见另一个[Dockerfile](https://github.com/binary-husky/gpt_academic/blob/master/docs/Dockerfile%2BNoLocal%2BLatex)
+P.S. 如果需要依赖Latex的插件功能,请见Wiki
2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker)
From 29c6bfb6cb08f58a0e5fba8540ef56cf36277cf6 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 16:12:06 +0800
Subject: [PATCH 11/46] Update README.md
---
README.md | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 182a49b..6e461c2 100644
--- a/README.md
+++ b/README.md
@@ -113,11 +113,16 @@ conda activate gptac_venv # 激活anaconda环境
python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
```
-P.S. 如果需要依赖Latex的插件功能,请见Wiki
-
如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处
+
+如果需要依赖Latex的插件功能(如Arxiv文献翻译),请点击展开此处
+
+ 请见[Wiki](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8Latex%E7%9B%B8%E5%85%B3%E6%8F%92%E4%BB%B6%EF%BC%88arxiv%E6%96%87%E7%AB%A0%E7%BF%BB%E8%AF%91%EF%BC%89)
+
+
+
【可选步骤】如果需要支持清华ChatGLM/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强):
```sh
# 【可选步骤I】支持清华ChatGLM。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
From a06e43c96b9f6c199b0d440d5db2e7247224a18b Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Sun, 18 Jun 2023 16:15:37 +0800
Subject: [PATCH 12/46] Update README.md
---
README.md | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/README.md b/README.md
index 6e461c2..c69bfb5 100644
--- a/README.md
+++ b/README.md
@@ -113,16 +113,10 @@ conda activate gptac_venv # 激活anaconda环境
python -m pip install -r requirements.txt # 这个步骤和pip安装一样的步骤
```
+
如果需要支持清华ChatGLM/复旦MOSS作为后端,请点击展开此处
-
-如果需要依赖Latex的插件功能(如Arxiv文献翻译),请点击展开此处
-
- 请见[Wiki](https://github.com/binary-husky/gpt_academic/wiki/%E5%A6%82%E4%BD%95%E4%BD%BF%E7%94%A8Latex%E7%9B%B8%E5%85%B3%E6%8F%92%E4%BB%B6%EF%BC%88arxiv%E6%96%87%E7%AB%A0%E7%BF%BB%E8%AF%91%EF%BC%89)
-
-
-
【可选步骤】如果需要支持清华ChatGLM/复旦MOSS作为后端,需要额外安装更多依赖(前提条件:熟悉Python + 用过Pytorch + 电脑配置够强):
```sh
# 【可选步骤I】支持清华ChatGLM。清华ChatGLM备注:如果遇到"Call ChatGLM fail 不能正常加载ChatGLM的参数" 错误,参考如下: 1:以上默认安装的为torch+cpu版,使用cuda需要卸载torch重新安装torch+cuda; 2:如因本机配置不够无法加载模型,可以修改request_llm/bridge_chatglm.py中的模型精度, 将 AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) 都修改为 AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
From d5bab093f94523665c5b0a6b7781dd491123faff Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Mon, 19 Jun 2023 15:17:33 +1000
Subject: [PATCH 13/46] rename function names
---
crazy_functions/latex_utils.py | 163 ++++++++++++++-------------------
1 file changed, 69 insertions(+), 94 deletions(-)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index eebce80..a984b2f 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -8,85 +8,65 @@ pj = os.path.join
"""
========================================================================
Part One
-Latex segmentation to a linklist
+Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
========================================================================
"""
PRESERVE = 0
TRANSFORM = 1
-def split_worker(text, mask, pattern, flags=0):
+def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
+ e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
+ you can mask out (mask = PRESERVE so that text become untouchable for GPT)
+ everything between "\begin{equation}" and "\end{equation}"
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
-def set_transform_area(text, mask, pattern, flags=0):
+def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
- Add a transform text area in this paper
+ Add a preserve text area in this paper (text become untouchable for GPT).
+ count the number of the braces so as to catch compelete text area.
+ e.g.
+ \caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
- mask[res.span()[0] : res.span()[1]] = TRANSFORM
+ brace_level = -1
+ p = begin = end = res.regs[0][0]
+ for _ in range(1024*16):
+ if text[p] == '}' and brace_level == 0: break
+ elif text[p] == '}': brace_level -= 1
+ elif text[p] == '{': brace_level += 1
+ p += 1
+ end = p+1
+ mask[begin:end] = PRESERVE
return text, mask
-
-def split_worker_careful_brace(text, mask, pattern, flags=0):
+def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
- Move area into preserve area.
- It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})".
+ Move area out of preserve area (make text editable for GPT)
+ count the number of the braces so as to catch compelete text area.
+ e.g.
+ \caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
- res = pattern_compile.search(text)
-
- # 确保捕获组存在
- if res and len(res.regs) > 1:
+ for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
- for _ in range(1024 * 16):
- if text[p] == "}" and brace_level == 1:
- break
- elif text[p] == "}":
- brace_level -= 1
- elif text[p] == "{":
- brace_level += 1
+ for _ in range(1024*16):
+ if text[p] == '}' and brace_level == 0: break
+ elif text[p] == '}': brace_level -= 1
+ elif text[p] == '{': brace_level += 1
p += 1
end = p
- mask[begin + 1 : end] = PRESERVE
- split_worker_careful_brace(text[end:], mask[end:], pattern, flags=flags)
-
+ mask[begin:end] = TRANSFORM
return text, mask
-
-def split_worker_reverse_careful_brace(text, mask, pattern, flags=0):
- """
- Move area out of preserve area.
- It is better to wrap the curly braces in the capture group, e.g., r"\\captioin(\{.*\})".
- """
- pattern_compile = re.compile(pattern, flags)
- res = pattern_compile.search(text)
-
- # 确保捕获组存在
- if res and len(res.regs) > 1:
- brace_level = 0
- p = begin = end = res.regs[1][0]
- for _ in range(1024 * 16):
- if text[p] == "}" and brace_level == 1:
- break
- elif text[p] == "}":
- brace_level -= 1
- elif text[p] == "{":
- brace_level += 1
- p += 1
- end = p
- mask[begin + 1 : end] = TRANSFORM
- split_worker_reverse_careful_brace(text[end:], mask[end:], pattern, flags=flags)
-
- return text, mask
-
-def split_worker_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
+def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
@@ -289,53 +269,48 @@ def split_subprocess(txt, project_folder, return_dict, opts):
mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
# 吸收title与作者以上的部分
- text, mask = split_worker(text, mask, r".*?\\begin\{document\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
# 删除iffalse注释
- text, mask = split_worker(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
# 吸收在25行以内的begin-end组合
- text, mask = split_worker_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=25)
+ text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
# 吸收匿名公式
- text, mask = split_worker(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\\[.*?\\\]", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\$\$(.*?)\$\$", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\\[.*?\\\]", re.DOTALL)
# 吸收其他杂项
- text, mask = split_worker(text, mask, r"\\section\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\section\*\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\subsection\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\subsubsection\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\bibliography\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\bibliographystyle\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
- text, mask = split_worker(text, mask, r"\\item ")
- text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\vspace\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\hspace\{(.*?)\}")
-
- text, mask = set_transform_area(text, mask, r"\\begin\{abstract\}.*?\\end\{abstract\}", re.DOTALL)
-
- text, mask = split_worker_careful_brace(text, mask, r"\\hl(\{.*\})", re.DOTALL)
- text, mask = split_worker_reverse_careful_brace(text, mask, r"\\caption(\{.*\})", re.DOTALL)
- text, mask = split_worker_reverse_careful_brace(text, mask, r"\\abstract(\{.*\})", re.DOTALL)
-
- text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
- text, mask = split_worker(text, mask, r"\\end\{(.*?)\}")
-
+ text, mask = set_forbidden_text(text, mask, r"\\section\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\section\*\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\subsection\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\subsubsection\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\bibliography\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\bibliographystyle\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\}(.*?)\\end\{figure\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\}(.*?)\\end\{multline\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{table\}(.*?)\\end\{table\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{table\*\}(.*?)\\end\{table\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\}(.*?)\\end\{minipage\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{align\*\}(.*?)\\end\{align\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
+ text, mask = set_forbidden_text(text, mask, r"\\item ")
+ text, mask = set_forbidden_text(text, mask, r"\\label\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\begin\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\vspace\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\hspace\{(.*?)\}")
+ text, mask = set_forbidden_text(text, mask, r"\\end\{(.*?)\}")
+ text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
+ # reverse 操作必须放在最后
+ text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL)
root = convert_to_linklist(text, mask)
# 修复括号
From af7734dd35c62de6f85a18b00c3598527b85cfb4 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Mon, 19 Jun 2023 16:57:11 +1000
Subject: [PATCH 14/46] avoid file fusion
---
crazy_functions/latex_utils.py | 2 +-
main.py | 4 ++--
toolbox.py | 14 ++++++++++----
3 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 78eec29..163d0e2 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -685,7 +685,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
- promote_file_to_downloadzone(result_pdf)
+ promote_file_to_downloadzone(result_pdf, chatbot)
return True # 成功啦
else:
if n_fix>=max_try: break
diff --git a/main.py b/main.py
index 7dbf17f..65e1f4c 100644
--- a/main.py
+++ b/main.py
@@ -155,7 +155,7 @@ def main():
for k in crazy_fns:
if not crazy_fns[k].get("AsButton", True): continue
click_handle = crazy_fns[k]["Button"].click(ArgsGeneralWrapper(crazy_fns[k]["Function"]), [*input_combo, gr.State(PORT)], output_combo)
- click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
+ click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot])
cancel_handles.append(click_handle)
# 函数插件-下拉菜单与随变按钮的互动
def on_dropdown_changed(k):
@@ -175,7 +175,7 @@ def main():
if k in [r"打开插件列表", r"请先从插件列表中选择"]: return
yield from ArgsGeneralWrapper(crazy_fns[k]["Function"])(*args, **kwargs)
click_handle = switchy_bt.click(route,[switchy_bt, *input_combo, gr.State(PORT)], output_combo)
- click_handle.then(on_report_generated, [file_upload, chatbot], [file_upload, chatbot])
+ click_handle.then(on_report_generated, [cookies, file_upload, chatbot], [cookies, file_upload, chatbot])
cancel_handles.append(click_handle)
# 终止按钮的回调函数注册
stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
diff --git a/toolbox.py b/toolbox.py
index 4ab1116..ac49afc 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -439,13 +439,15 @@ def find_recent_files(directory):
return recent_files
-def promote_file_to_downloadzone(file, rename_file=None):
+def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区
import shutil
if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
new_path = os.path.join(f'./gpt_log/', rename_file)
if os.path.exists(new_path): os.remove(new_path)
shutil.copyfile(file, new_path)
+ if chatbot:
+ chatbot._cookies.update({'file_to_promote': [new_path]})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
"""
@@ -485,16 +487,20 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
return chatbot, txt, txt2
-def on_report_generated(files, chatbot):
+def on_report_generated(cookies, files, chatbot):
from toolbox import find_recent_files
- report_files = find_recent_files('gpt_log')
+ if 'file_to_promote' in cookies:
+ report_files = cookies['file_to_promote']
+ cookies.pop('file_to_promote')
+ else:
+ report_files = find_recent_files('gpt_log')
if len(report_files) == 0:
return None, chatbot
# files.extend(report_files)
file_links = ''
for f in report_files: file_links += f'
{f}'
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
- return report_files, chatbot
+ return cookies, report_files, chatbot
def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
From f3e4e26e2f095e1f0d3b5faeaec23fde2b0b33a0 Mon Sep 17 00:00:00 2001
From: dackdawn
Date: Mon, 19 Jun 2023 21:40:26 +0800
Subject: [PATCH 15/46] =?UTF-8?q?=E6=B7=BB=E5=8A=A00613=E6=A8=A1=E5=9E=8B?=
=?UTF-8?q?=E7=9A=84=E5=A3=B0=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
openai对gpt-3.5-turbo的RPM限制是3,而gpt-3.5-turbo-0613的RPM是60,虽然两个模型的内容是一致的,但是选定特定模型可以获得更高的RPM和TPM
---
config.py | 2 +-
request_llm/bridge_all.py | 18 ++++++++++++++++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/config.py b/config.py
index 87e0ec9..917c268 100644
--- a/config.py
+++ b/config.py
@@ -46,7 +46,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
+AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-0613", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
# P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index a27407c..22fa04b 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -93,6 +93,24 @@ model_info = {
"token_cnt": get_token_num_gpt35,
},
+ "gpt-3.5-turbo-0613": {
+ "fn_with_ui": chatgpt_ui,
+ "fn_without_ui": chatgpt_noui,
+ "endpoint": openai_endpoint,
+ "max_token": 4096,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+
+ "gpt-3.5-turbo-16k-0613": {
+ "fn_with_ui": chatgpt_ui,
+ "fn_without_ui": chatgpt_noui,
+ "endpoint": openai_endpoint,
+ "max_token": 1024 * 16,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+
"gpt-4": {
"fn_with_ui": chatgpt_ui,
"fn_without_ui": chatgpt_noui,
From 5da633d94dfa13c7658956537bc7c6c0d37e8a73 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lebenito=EF=BC=88=E7=94=9F=E7=B3=B8=EF=BC=89?=
Date: Tue, 20 Jun 2023 19:10:11 +0800
Subject: [PATCH 16/46] Update README.md
Fix the error URL for the git clone.
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index c69bfb5..7976076 100644
--- a/README.md
+++ b/README.md
@@ -91,7 +91,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
1. 下载项目
```sh
-git clone https://github.com/binary-husky/.git
+git clone https://github.com/binary-husky/gpt_academic.git
cd gpt_academic
```
From 61eb0da861526ccee760caba86ffca387d9af358 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Tue, 20 Jun 2023 22:08:09 +1000
Subject: [PATCH 17/46] fix encoding bug
---
crazy_functions/latex_utils.py | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 163d0e2..308044f 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -175,9 +175,8 @@ def merge_tex_files(project_foler, main_file, mode):
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
- if platform.system() != 'Windows':
- main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows]{\2}",main_file)
- main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows]{\1}",main_file)
+ main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
+ main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(main_file)
From bf955aaf12e94674877ca61d02d197547ae05cee Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Tue, 20 Jun 2023 23:12:30 +1000
Subject: [PATCH 18/46] fix bugs
---
crazy_functional.py | 45 +++++++++++++++++++----------
crazy_functions/Latex输出PDF结果.py | 23 ++++++++++-----
crazy_functions/latex_utils.py | 24 ++++++++-------
crazy_functions/对话历史存档.py | 7 ++---
toolbox.py | 4 +--
5 files changed, 63 insertions(+), 40 deletions(-)
diff --git a/crazy_functional.py b/crazy_functional.py
index 2f0fbaa..6ad2dc8 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -358,29 +358,42 @@ def get_crazy_functions():
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
- "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": {
+ "Arixv翻译(输入arxivID) [需Latex]": {
"Color": "stop",
"AsButton": False,
- # "AdvancedArgs": True,
- # "ArgsReminder": "",
+ "AdvancedArgs": True,
+ "ArgsReminder":
+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
+ "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+ "Function": HotReload(Latex翻译中文并重新编译PDF)
+ }
+ })
+ function_plugins.update({
+ "本地论文翻译(上传Latex压缩包) [需Latex]": {
+ "Color": "stop",
+ "AsButton": False,
+ "AdvancedArgs": True,
+ "ArgsReminder":
+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
+ "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
except:
print('Load function plugin failed')
- try:
- from crazy_functions.虚空终端 import 终端
- function_plugins.update({
- "超级终端": {
- "Color": "stop",
- "AsButton": False,
- # "AdvancedArgs": True,
- # "ArgsReminder": "",
- "Function": HotReload(终端)
- }
- })
- except:
- print('Load function plugin failed')
+ # try:
+ # from crazy_functions.虚空终端 import 终端
+ # function_plugins.update({
+ # "超级终端": {
+ # "Color": "stop",
+ # "AsButton": False,
+ # # "AdvancedArgs": True,
+ # # "ArgsReminder": "",
+ # "Function": HotReload(终端)
+ # }
+ # })
+ # except:
+ # print('Load function plugin failed')
return function_plugins
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 6c89751..214b00a 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -1,12 +1,13 @@
from toolbox import update_ui, trimmed_format_exc, get_conf, objdump, objload, promote_file_to_downloadzone
from toolbox import CatchException, report_execption, update_ui_lastest_msg, zip_result, gen_time_str
+from functools import partial
import glob, os, requests, time
pj = os.path.join
ARXIV_CACHE_DIR = os.path.expanduser(f"~/arxiv_cache/")
# =================================== 工具函数 ===============================================
专业词汇声明 = 'If the term "agent" is used in this section, it should be translated to "智能体". '
-def switch_prompt(pfg, mode):
+def switch_prompt(pfg, mode, more_requirement):
"""
Generate prompts and system prompts based on the mode for proofreading or translating.
Args:
@@ -25,7 +26,7 @@ def switch_prompt(pfg, mode):
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
elif mode == 'translate_zh':
- inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + 专业词汇声明 +
+ inputs_array = [r"Below is a section from an English academic paper, translate it into Chinese. " + more_requirement +
r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
r"Answer me only with the translated text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
@@ -79,7 +80,7 @@ def arxiv_download(chatbot, history, txt):
os.makedirs(translation_dir)
target_file = pj(translation_dir, 'translate_zh.pdf')
if os.path.exists(target_file):
- promote_file_to_downloadzone(target_file)
+ promote_file_to_downloadzone(target_file, rename_file=None, chatbot=chatbot)
return target_file
return False
def is_float(s):
@@ -88,8 +89,10 @@ def arxiv_download(chatbot, history, txt):
return True
except ValueError:
return False
- if ('.' in txt) and ('/' not in txt) and is_float(txt):
+ if ('.' in txt) and ('/' not in txt) and is_float(txt): # is arxiv ID
txt = 'https://arxiv.org/abs/' + txt
+ if ('.' in txt) and ('/' not in txt) and is_float(txt[:10]): # is arxiv ID
+ txt = 'https://arxiv.org/abs/' + txt[:10]
if not txt.startswith('https://arxiv.org'):
return txt, None
@@ -177,7 +180,8 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_proofread.tex'):
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+ yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
+ chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
# <-------------- compile PDF ------------->
@@ -208,6 +212,10 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
"对整个Latex项目进行翻译, 生成中文PDF。函数插件贡献者: Binary-Husky。注意事项: 此插件Windows支持最佳,Linux下必须使用Docker安装,详见项目主README.md。目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+ # <-------------- more requirements ------------->
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+ more_req = plugin_kwargs.get("advanced_arg", "")
+ _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
@@ -255,11 +263,12 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
if not os.path.exists(project_folder + '/merge_translate_zh.tex'):
- yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='translate_zh', switch_prompt=switch_prompt)
+ yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
+ chatbot, history, system_prompt, mode='translate_zh', switch_prompt=_switch_prompt_)
# <-------------- compile PDF ------------->
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh',
+ success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_translate_zh', mode='translate_zh',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
# <-------------- zip PDF ------------->
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 308044f..58ac413 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -403,7 +403,7 @@ class LatexPaperSplit():
def __init__(self) -> None:
self.nodes = None
self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
- "版权归原文作者所有。翻译内容可靠性无任何保障,请仔细鉴别并以原文为准。" + \
+ "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
@@ -623,7 +623,7 @@ def compile_latex_with_timeout(command, timeout=60):
return False
return True
-def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder):
+def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
import os, time
current_dir = os.getcwd()
n_fix = 1
@@ -634,6 +634,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
while True:
import os
+
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
@@ -655,15 +656,16 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
- print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
- ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+ if mode!='translate_zh':
+ yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
+ print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
+ ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
- yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
# <--------------------->
os.chdir(current_dir)
@@ -684,7 +686,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
- promote_file_to_downloadzone(result_pdf, chatbot)
+ promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
return True # 成功啦
else:
if n_fix>=max_try: break
diff --git a/crazy_functions/对话历史存档.py b/crazy_functions/对话历史存档.py
index c638d1b..fed0f8f 100644
--- a/crazy_functions/对话历史存档.py
+++ b/crazy_functions/对话历史存档.py
@@ -1,4 +1,4 @@
-from toolbox import CatchException, update_ui
+from toolbox import CatchException, update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
import re
@@ -29,9 +29,8 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
for h in history:
f.write("\n>>>" + h)
f.write('')
- res = '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
- print(res)
- return res
+ promote_file_to_downloadzone(f'./gpt_log/{file_name}', rename_file=file_name, chatbot=chatbot)
+ return '对话历史写入:' + os.path.abspath(f'./gpt_log/{file_name}')
def gen_file_preview(file_name):
try:
diff --git a/toolbox.py b/toolbox.py
index ac49afc..ff936d6 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -444,8 +444,8 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
import shutil
if rename_file is None: rename_file = f'{gen_time_str()}-{os.path.basename(file)}'
new_path = os.path.join(f'./gpt_log/', rename_file)
- if os.path.exists(new_path): os.remove(new_path)
- shutil.copyfile(file, new_path)
+ if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
+ if not os.path.exists(new_path): shutil.copyfile(file, new_path)
if chatbot:
chatbot._cookies.update({'file_to_promote': [new_path]})
From cb0bb6ab4a9b458118435220086bb60cea238416 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 00:41:33 +1000
Subject: [PATCH 19/46] fix minor bugs
---
crazy_functional.py | 22 ++++++-------
crazy_functions/Latex输出PDF结果.py | 7 +++--
crazy_functions/crazy_utils.py | 48 +++++++++++++++++++++++++++++
crazy_functions/latex_utils.py | 26 ++++++++++++++++
toolbox.py | 12 +++++---
5 files changed, 98 insertions(+), 17 deletions(-)
diff --git a/crazy_functional.py b/crazy_functional.py
index 6ad2dc8..ded0698 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -368,17 +368,17 @@ def get_crazy_functions():
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
- function_plugins.update({
- "本地论文翻译(上传Latex压缩包) [需Latex]": {
- "Color": "stop",
- "AsButton": False,
- "AdvancedArgs": True,
- "ArgsReminder":
- "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
- "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
- "Function": HotReload(Latex翻译中文并重新编译PDF)
- }
- })
+ # function_plugins.update({
+ # "本地论文翻译(上传Latex压缩包) [需Latex]": {
+ # "Color": "stop",
+ # "AsButton": False,
+ # "AdvancedArgs": True,
+ # "ArgsReminder":
+ # "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
+ # "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+ # "Function": HotReload(Latex翻译中文并重新编译PDF)
+ # }
+ # })
except:
print('Load function plugin failed')
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 214b00a..4f19967 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -108,6 +108,7 @@ def arxiv_download(chatbot, history, txt):
return msg, None
# <-------------- set format ------------->
arxiv_id = url_.split('/abs/')[-1]
+ if 'v' in arxiv_id: arxiv_id = arxiv_id[:10]
cached_translation_pdf = check_cached_translation_pdf(arxiv_id)
if cached_translation_pdf: return cached_translation_pdf, arxiv_id
@@ -190,13 +191,14 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- zip PDF ------------->
- zip_result(project_folder)
+ zip_res = zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+ promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
return success
@@ -272,13 +274,14 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
# <-------------- zip PDF ------------->
- zip_result(project_folder)
+ zip_res = zip_result(project_folder)
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+ promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
# <-------------- we are done ------------->
diff --git a/crazy_functions/crazy_utils.py b/crazy_functions/crazy_utils.py
index 96301ff..a1b1493 100644
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@@ -698,3 +698,51 @@ def try_install_deps(deps):
for dep in deps:
import subprocess, sys
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--user', dep])
+
+
+class construct_html():
+ def __init__(self) -> None:
+ self.css = """
+.row {
+ display: flex;
+ flex-wrap: wrap;
+}
+
+.column {
+ flex: 1;
+ padding: 10px;
+}
+
+.table-header {
+ font-weight: bold;
+ border-bottom: 1px solid black;
+}
+
+.table-row {
+ border-bottom: 1px solid lightgray;
+}
+
+.table-cell {
+ padding: 5px;
+}
+ """
+ self.html_string = f'翻译结果'
+
+
+ def add_row(self, a, b):
+ tmp = """
+
+
REPLACE_A
+
REPLACE_B
+
+ """
+ from toolbox import markdown_convertion
+ tmp = tmp.replace('REPLACE_A', markdown_convertion(a))
+ tmp = tmp.replace('REPLACE_B', markdown_convertion(b))
+ self.html_string += tmp
+
+
+ def save_file(self, file_name):
+ with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
+ f.write(self.html_string.encode('utf-8', 'ignore').decode())
+
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 58ac413..a1e7758 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -497,7 +497,32 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
+def write_html(sp_file_contents, sp_file_result, chatbot):
+ # write html
+ try:
+ import copy
+ from .crazy_utils import construct_html
+ from toolbox import gen_time_str
+ ch = construct_html()
+ orig = ""
+ trans = ""
+ final = []
+ for c,r in zip(sp_file_contents, sp_file_result):
+ final.append(c)
+ final.append(r)
+ for i, k in enumerate(final):
+ if i%2==0:
+ orig = k
+ if i%2==1:
+ trans = k
+ ch.add_row(a=orig, b=trans)
+ create_report_file_name = f"{gen_time_str()}.trans.html"
+ ch.save_file(create_report_file_name)
+ promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
+ except:
+ from toolbox import trimmed_format_exc
+ print('writing html result failed:', trimmed_format_exc())
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
import time, os, re
@@ -574,6 +599,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
pfg.get_token_num = None
objdump(pfg, file=pj(project_folder,'temp.pkl'))
+ write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
diff --git a/toolbox.py b/toolbox.py
index ff936d6..fb6aa9f 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -6,6 +6,7 @@ import re
import os
from latex2mathml.converter import convert as tex2mathml
from functools import wraps, lru_cache
+pj = os.path.join
"""
========================================================================
@@ -399,7 +400,7 @@ def extract_archive(file_path, dest_dir):
print("Successfully extracted rar archive to {}".format(dest_dir))
except:
print("Rar format requires additional dependencies to install")
- return '\n\n需要安装pip install rarfile来解压rar文件'
+ return '\n\n解压失败! 需要安装pip install rarfile来解压rar文件'
# 第三方库,需要预先pip install py7zr
elif file_extension == '.7z':
@@ -410,7 +411,7 @@ def extract_archive(file_path, dest_dir):
print("Successfully extracted 7z archive to {}".format(dest_dir))
except:
print("7z format requires additional dependencies to install")
- return '\n\n需要安装pip install py7zr来解压7z文件'
+ return '\n\n解压失败! 需要安装pip install py7zr来解压7z文件'
else:
return ''
return ''
@@ -447,7 +448,9 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
if os.path.exists(new_path) and not os.path.samefile(new_path, file): os.remove(new_path)
if not os.path.exists(new_path): shutil.copyfile(file, new_path)
if chatbot:
- chatbot._cookies.update({'file_to_promote': [new_path]})
+ if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote']
+ else: current = []
+ chatbot._cookies.update({'file_to_promote': [new_path] + current})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
"""
@@ -802,7 +805,8 @@ def zip_result(folder):
import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
zip_folder(folder, './gpt_log/', f'{t}-result.zip')
-
+ return pj('./gpt_log/', f'{t}-result.zip')
+
def gen_time_str():
import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
From d7b056576d51945808dcb99733ec7931aedad5be Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 00:52:58 +1000
Subject: [PATCH 20/46] add latex docker-compose
---
docs/GithubAction+NoLocal+Latex | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
create mode 100644 docs/GithubAction+NoLocal+Latex
diff --git a/docs/GithubAction+NoLocal+Latex b/docs/GithubAction+NoLocal+Latex
new file mode 100644
index 0000000..5ff9bb8
--- /dev/null
+++ b/docs/GithubAction+NoLocal+Latex
@@ -0,0 +1,25 @@
+# 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
+# - 1 修改 `config.py`
+# - 2 构建 docker build -t gpt-academic-nolocal-latex -f docs/Dockerfile+NoLocal+Latex .
+# - 3 运行 docker run -v /home/fuqingxu/arxiv_cache:/root/arxiv_cache --rm -it --net=host gpt-academic-nolocal-latex
+
+FROM fuqingxu/python311_texlive_ctex:latest
+
+# 指定路径
+WORKDIR /gpt
+
+RUN pip3 install gradio openai numpy arxiv rich
+RUN pip3 install colorama Markdown pygments pymupdf
+
+# 装载项目文件
+COPY . .
+
+
+# 安装依赖
+RUN pip3 install -r requirements.txt
+
+# 可选步骤,用于预热模块
+RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
+
+# 启动
+CMD ["python3", "-u", "main.py"]
From 22a65cd1637e0d690c7db0326ddb2f5f312c0764 Mon Sep 17 00:00:00 2001
From: binary-husky <96192199+binary-husky@users.noreply.github.com>
Date: Wed, 21 Jun 2023 00:55:24 +1000
Subject: [PATCH 21/46] Create build-with-latex.yml
---
.github/workflows/build-with-latex.yml | 44 ++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
create mode 100644 .github/workflows/build-with-latex.yml
diff --git a/.github/workflows/build-with-latex.yml b/.github/workflows/build-with-latex.yml
new file mode 100644
index 0000000..fb16d2c
--- /dev/null
+++ b/.github/workflows/build-with-latex.yml
@@ -0,0 +1,44 @@
+# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
+name: Create and publish a Docker image for Latex support
+
+on:
+ push:
+ branches:
+ - 'master'
+
+env:
+ REGISTRY: ghcr.io
+ IMAGE_NAME: ${{ github.repository }}_with_latex
+
+jobs:
+ build-and-push-image:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v3
+
+ - name: Log in to the Container registry
+ uses: docker/login-action@v2
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Extract metadata (tags, labels) for Docker
+ id: meta
+ uses: docker/metadata-action@v4
+ with:
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+ - name: Build and push Docker image
+ uses: docker/build-push-action@v4
+ with:
+ context: .
+ push: true
+ file: docs/GithubAction+NoLocal+Latex
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
From 1fede6df7fc182a355fac65fc4487e1b579d7be7 Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Tue, 20 Jun 2023 23:05:17 +0800
Subject: [PATCH 22/46] temp
---
crazy_functional.py | 5 +++--
crazy_functions/Latex输出PDF结果.py | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/crazy_functional.py b/crazy_functional.py
index d8ca9ae..abd44d7 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -119,6 +119,7 @@ def get_crazy_functions():
},
"[插件demo] 历史上的今天": {
# HotReload 的意思是热更新,修改函数插件代码后,不需要重启程序,代码直接生效
+ "AsButton": False, # 加入下拉菜单中
"Function": HotReload(高阶功能模板函数)
},
@@ -358,9 +359,9 @@ def get_crazy_functions():
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
- "[功能尚不稳定] Latex翻译/Arixv翻译+重构PDF": {
+ "Arixv论文精细翻译": {
"Color": "stop",
- "AsButton": False,
+ "AsButton": True,
# "AdvancedArgs": True,
# "ArgsReminder": "",
"Function": HotReload(Latex翻译中文并重新编译PDF)
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 2e9a30b..1d5e103 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -89,7 +89,7 @@ def arxiv_download(chatbot, history, txt):
except ValueError:
return False
if ('.' in txt) and ('/' not in txt) and is_float(txt):
- txt = 'https://arxiv.org/abs/' + txt
+ txt = 'https://arxiv.org/abs/' + txt.strip()
if not txt.startswith('https://arxiv.org'):
return txt, None
From cf5f348d704cfadaeb7c86bdf43bfdc219f68a47 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 11:20:31 +1000
Subject: [PATCH 23/46] update test samples
---
crazy_functions/crazy_functions_test.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index e743878..b4ff5e2 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -189,6 +189,7 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2211.16068" # ACE
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
txt = r"https://arxiv.org/abs/2002.09253"
+ txt = r"https://arxiv.org/abs/2306.07831"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
@@ -217,6 +218,7 @@ def test_Latex():
# test_数学动画生成manim()
# test_Langchain知识库()
# test_Langchain知识库读取()
-test_Latex()
-input("程序完成,回车退出。")
-print("退出。")
\ No newline at end of file
+if __name__ == "__main__":
+ test_Latex()
+ input("程序完成,回车退出。")
+ print("退出。")
\ No newline at end of file
From d87f1eb17133a31707152f84d37cf6e9d2e4e5dc Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 11:38:59 +1000
Subject: [PATCH 24/46] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=8E=A5=E5=85=A5azure?=
=?UTF-8?q?=E7=9A=84=E8=AF=B4=E6=98=8E?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 11 ++--
config.py | 13 +++--
docs/use_azure.md | 143 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 157 insertions(+), 10 deletions(-)
create mode 100644 docs/use_azure.md
diff --git a/README.md b/README.md
index 7976076..7760260 100644
--- a/README.md
+++ b/README.md
@@ -186,16 +186,19 @@ docker-compose up
2. 使用docker-compose运行。
请阅读docker-compose.yml后,按照其中的提示操作即可
-3. 如何使用反代URL/微软云AzureAPI。
+3. 如何使用反代URL
按照`config.py`中的说明配置API_URL_REDIRECT即可。
-4. 远程云服务器部署(需要云服务器知识与经验)。
+4. 微软云AzureAPI
+按照`config.py`中的说明配置即可(AZURE_ENDPOINT等四个配置)
+
+5. 远程云服务器部署(需要云服务器知识与经验)。
请访问[部署wiki-1](https://github.com/binary-husky/gpt_academic/wiki/%E4%BA%91%E6%9C%8D%E5%8A%A1%E5%99%A8%E8%BF%9C%E7%A8%8B%E9%83%A8%E7%BD%B2%E6%8C%87%E5%8D%97)
-5. 使用WSL2(Windows Subsystem for Linux 子系统)。
+6. 使用WSL2(Windows Subsystem for Linux 子系统)。
请访问[部署wiki-2](https://github.com/binary-husky/gpt_academic/wiki/%E4%BD%BF%E7%94%A8WSL2%EF%BC%88Windows-Subsystem-for-Linux-%E5%AD%90%E7%B3%BB%E7%BB%9F%EF%BC%89%E9%83%A8%E7%BD%B2)
-6. 如何在二级网址(如`http://localhost/subpath`)下运行。
+7. 如何在二级网址(如`http://localhost/subpath`)下运行。
请访问[FastAPI运行说明](docs/WithFastapi.md)
---
diff --git a/config.py b/config.py
index cb26cbb..b173862 100644
--- a/config.py
+++ b/config.py
@@ -1,12 +1,6 @@
# [step 1]>> 例如: API_KEY = "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" (此key无效)
API_KEY = "sk-此处填API密钥" # 可同时填写多个API-KEY,用英文逗号分割,例如API_KEY = "sk-openaikey1,sk-openaikey2,fkxxxx-api2dkey1,fkxxxx-api2dkey2"
-#增加关于AZURE的配置信息, 可以在AZURE网页中找到
-AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
-AZURE_API_KEY = "填入azure openai api的密钥"
-AZURE_API_VERSION = "填入api版本"
-AZURE_ENGINE = "填入ENGINE"
-
# [step 2]>> 改为True应用代理,如果直接在海外服务器部署,此处不修改
USE_PROXY = False
@@ -88,3 +82,10 @@ your bing cookies here
# 如果需要使用Slack Claude,使用教程详情见 request_llm/README.md
SLACK_CLAUDE_BOT_ID = ''
SLACK_CLAUDE_USER_TOKEN = ''
+
+
+# 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
+AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
+AZURE_API_KEY = "填入azure openai api的密钥"
+AZURE_API_VERSION = "填入api版本"
+AZURE_ENGINE = "填入ENGINE"
diff --git a/docs/use_azure.md b/docs/use_azure.md
new file mode 100644
index 0000000..626b132
--- /dev/null
+++ b/docs/use_azure.md
@@ -0,0 +1,143 @@
+# 通过微软Azure云服务申请 Openai API
+
+由于Openai和微软的关系,现在是可以通过微软的Azure云计算服务直接访问openai的api,免去了注册和网络的问题。
+
+快速入门的官方文档的链接是:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
+
+# 申请API
+
+按文档中的“先决条件”的介绍,出了编程的环境以外,还需要以下三个条件:
+
+1. Azure账号并创建订阅
+
+2. 为订阅添加Azure OpenAI 服务
+
+3. 部署模型
+
+## Azure账号并创建订阅
+
+### Azure账号
+
+创建Azure的账号时最好是有微软的账号,这样似乎更容易获得免费额度(第一个月的200美元,实测了一下,如果用一个刚注册的微软账号登录Azure的话,并没有这一个月的免费额度)。
+
+创建Azure账号的网址是:[立即创建 Azure 免费帐户 | Microsoft Azure](https://azure.microsoft.com/zh-cn/free/)
+
+
+
+打开网页后,点击 “免费开始使用” 会跳转到登录或注册页面,如果有微软的账户,直接登录即可,如果没有微软账户,那就需要到微软的网页再另行注册一个。
+
+注意,Azure的页面和政策时不时会变化,已实际最新显示的为准就好。
+
+### 创建订阅
+
+注册好Azure后便可进入主页:
+
+
+
+首先需要在订阅里进行添加操作,点开后即可进入订阅的页面:
+
+
+
+第一次进来应该是空的,点添加即可创建新的订阅(可以是“免费”或者“即付即用”的订阅),其中订阅ID是后面申请Azure OpenAI需要使用的。
+
+## 为订阅添加Azure OpenAI服务
+
+之后回到首页,点Azure OpenAI即可进入OpenAI服务的页面(如果不显示的话,则在首页上方的搜索栏里搜索“openai”即可)。
+
+
+
+不过现在这个服务还不能用。在使用前,还需要在这个网址申请一下:
+
+[Request Access to Azure OpenAI Service (microsoft.com)](https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu)
+
+这里有二十来个问题,按照要求和自己的实际情况填写即可。
+
+其中需要注意的是
+
+1. 千万记得填对"订阅ID"
+
+2. 需要填一个公司邮箱(可以不是注册用的邮箱)和公司网址
+
+之后,在回到上面那个页面,点创建,就会进入创建页面了:
+
+
+
+需要填入“资源组”和“名称”,按照自己的需要填入即可。
+
+完成后,在主页的“资源”里就可以看到刚才创建的“资源”了,点击进入后,就可以进行最后的部署了。
+
+
+
+## 部署模型
+
+进入资源页面后,在部署模型前,可以先点击“开发”,把密钥和终结点记下来。
+
+
+
+之后,就可以去部署模型了,点击“部署”即可,会跳转到 Azure OpenAI Stuido 进行下面的操作:
+
+
+
+进入 Azure OpenAi Studio 后,点击新建部署,会弹出如下对话框:
+
+
+
+在这里选 gpt-35-turbo 或需要的模型并按需要填入“部署名”即可完成模型的部署。
+
+
+
+这个部署名需要记下来。
+
+到现在为止,申请操作就完成了,需要记下来的有下面几个东西:
+
+● 密钥(1或2都可以)
+
+● 终结点
+
+● 部署名(不是模型名)
+
+# API的使用
+
+接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
+
+和openai自己的api调用有点类似,都需要安装openai库,不同的是调用方式
+
+```
+import openai
+openai.api_type = "azure" #固定格式,无需修改
+openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点”
+openai.api_version = "2023-05-15" #固定格式,无需修改
+openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2”
+
+response = openai.ChatCompletion.create(
+ engine="gpt-35-turbo", #这里填入的不是模型名,是部署名
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
+ {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
+ {"role": "user", "content": "Do other Azure Cognitive Services support this too?"}
+ ]
+)
+
+print(response)
+print(response['choices'][0]['message']['content'])
+
+```
+
+需要注意的是:
+
+1. engine那里填入的是部署名,不是模型名
+
+2. 通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同,不需要 decode,已经是解析好的 json 了,直接根据键值读取即可。
+
+更细节的使用方法,详见官方API文档。
+
+# 关于费用
+
+Azure OpenAI API 还是需要一些费用的(免费订阅只有1个月有效期),费用如下:
+
+
+
+具体可以可以看这个网址 :[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable)
+
+并非网上说的什么“一年白嫖”,但注册方法以及网络问题都比直接使用openai的api要简单一些。
From cd389499035e2e2684063da6c9b8c5b24002fdfb Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Wed, 21 Jun 2023 11:53:57 +1000
Subject: [PATCH 25/46] =?UTF-8?q?=E5=BD=93=E9=81=87=E5=88=B0=E9=94=99?=
=?UTF-8?q?=E8=AF=AF=E6=97=B6=EF=BC=8C=E5=9B=9E=E6=BB=9A=E5=88=B0=E5=8E=9F?=
=?UTF-8?q?=E6=96=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/latex_utils.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index a1e7758..48df10b 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -212,6 +212,8 @@ def fix_content(final_tex, node_string):
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
+ if "Traceback" in final_tex and "[Local Message]" in final_tex:
+ final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
From 74941170aaec1838fbc0e99963588458addcc9b8 Mon Sep 17 00:00:00 2001
From: Ranhuiryan
Date: Wed, 21 Jun 2023 16:19:26 +0800
Subject: [PATCH 26/46] update azure use instruction
---
docs/use_azure.md | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/docs/use_azure.md b/docs/use_azure.md
index 626b132..f1c27ef 100644
--- a/docs/use_azure.md
+++ b/docs/use_azure.md
@@ -96,6 +96,15 @@
● 部署名(不是模型名)
+# 修改 config.py
+
+```
+AZURE_ENDPOINT = "填入终结点"
+AZURE_API_KEY = "填入azure openai api的密钥"
+AZURE_API_VERSION = "2023-05-15" # 默认使用 2023-05-15 版本,无需修改
+AZURE_ENGINE = "填入部署名"
+
+```
# API的使用
接下来就是具体怎么使用API了,还是可以参考官方文档:[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
From 33d2e75aac8063f9d8fe615599fccf948b48424e Mon Sep 17 00:00:00 2001
From: Ranhuiryan
Date: Wed, 21 Jun 2023 16:19:49 +0800
Subject: [PATCH 27/46] add azure-gpt35 to model list
---
config.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/config.py b/config.py
index b173862..557b4e9 100644
--- a/config.py
+++ b/config.py
@@ -47,7 +47,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 同时它必须被包含在AVAIL_LLM_MODELS切换列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
+AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "newbing-free", "stack-claude"]
# P.S. 其他可用的模型还包括 ["newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
From d841d13b047207fc15e277601ab1140e33988a9e Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 22:12:44 +0800
Subject: [PATCH 28/46] add arxiv translation test samples
---
crazy_functions/crazy_functions_test.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index b4ff5e2..6e17fb3 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -188,8 +188,13 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2305.17608"
# txt = r"https://arxiv.org/abs/2211.16068" # ACE
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
- txt = r"https://arxiv.org/abs/2002.09253"
- txt = r"https://arxiv.org/abs/2306.07831"
+ # txt = r"https://arxiv.org/abs/2002.09253"
+ # txt = r"https://arxiv.org/abs/2306.07831"
+ # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
+ # txt = r"https://arxiv.org/abs/2212.10156"
+ txt = r"https://arxiv.org/abs/2211.11559"
+
+
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
From b8560b75101437f7ab13e478c63d6a412d815790 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 22:46:16 +0800
Subject: [PATCH 29/46] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E8=AF=AF=E5=88=A4latex?=
=?UTF-8?q?=E6=A8=A1=E6=9D=BF=E6=96=87=E4=BB=B6=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/crazy_functions_test.py | 1 -
crazy_functions/latex_utils.py | 28 ++++++++++++++++++++++---
2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 6e17fb3..7edd04f 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,7 +190,6 @@ def test_Latex():
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
# txt = r"https://arxiv.org/abs/2002.09253"
# txt = r"https://arxiv.org/abs/2306.07831"
- # txt = r"C:\Users\fuqingxu\Desktop\2023-06-18-13-56-57-result"
# txt = r"https://arxiv.org/abs/2212.10156"
txt = r"https://arxiv.org/abs/2211.11559"
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 48df10b..def4be2 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -110,19 +110,41 @@ Latex Merge File
def 寻找Latex主文件(file_manifest, mode):
"""
在多Tex文档中,寻找主文件,必须包含documentclass,返回找到的第一个。
- P.S. 但愿没人把latex模板放在里面传进来
+ P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
+ canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8') as f:
file_content = f.read()
if r'\documentclass' in file_content:
- return texf
+ canidates.append(texf)
else:
continue
- raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
+ if len(canidates) == 0:
+ raise RuntimeError('无法找到一个主Tex文件(包含documentclass关键字)')
+ elif len(canidates) == 1:
+ return canidates[0]
+ else: # if len(canidates) >= 2 通过一些Latex模板中常见(但通常不会出现在正文)的单词,对不同latex源文件扣分,取评分最高者返回
+ canidates_score = []
+ # 给出一些判定模板文档的词作为扣分项
+ unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
+ expected_words = ['\input', '\ref', '\cite']
+ for texf in canidates:
+ canidates_score.append(0)
+ with open(texf, 'r', encoding='utf8') as f:
+ file_content = f.read()
+ for uw in unexpected_words:
+ if uw in file_content:
+ canidates_score[-1] -= 1
+ for uw in expected_words:
+ if uw in file_content:
+ canidates_score[-1] += 1
+ select = np.argmax(canidates_score) # 取评分最高者返回
+ return canidates[select]
+
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
From 9f0cf9fb2b3546e13a94f6cb9d6e0fa44eaffad9 Mon Sep 17 00:00:00 2001
From: 505030475 <505030475@qq.com>
Date: Sun, 25 Jun 2023 23:30:31 +0800
Subject: [PATCH 30/46] =?UTF-8?q?arxiv=20PDF=20=E5=BC=95=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/crazy_functions_test.py | 3 ++-
crazy_functions/latex_utils.py | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 7edd04f..3ef555d 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -191,7 +191,8 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2002.09253"
# txt = r"https://arxiv.org/abs/2306.07831"
# txt = r"https://arxiv.org/abs/2212.10156"
- txt = r"https://arxiv.org/abs/2211.11559"
+ # txt = r"https://arxiv.org/abs/2211.11559"
+ txt = r"https://arxiv.org/abs/2303.08774"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index def4be2..3e4f37c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -314,6 +314,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
text, mask = split_worker(text, mask, r"\\begin\{align\}(.*?)\\end\{align\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{equation\}(.*?)\\end\{equation\}", re.DOTALL)
text, mask = split_worker(text, mask, r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}", re.DOTALL)
+ text, mask = split_worker(text, mask, r"\\includepdf\[(.*?)\]\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\item ")
text, mask = split_worker(text, mask, r"\\label\{(.*?)\}")
text, mask = split_worker(text, mask, r"\\begin\{(.*?)\}")
From 280e14d7b7794a2e94193d553e8bd271dd0fd3f7 Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Mon, 26 Jun 2023 09:59:14 +0800
Subject: [PATCH 31/46] =?UTF-8?q?=E6=9B=B4=E6=96=B0Latex=E6=A8=A1=E5=9D=97?=
=?UTF-8?q?=E7=9A=84docker-compose?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 19 ++++++-------------
docker-compose.yml | 27 +++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
index 7760260..b8b76c9 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ cd gpt_academic
2. 配置API_KEY
-在`config.py`中,配置API KEY等设置,[特殊网络环境设置](https://github.com/binary-husky/gpt_academic/issues/1) 。
+在`config.py`中,配置API KEY等设置,[点击查看特殊网络环境设置方法](https://github.com/binary-husky/gpt_academic/issues/1) 。
(P.S. 程序运行时会优先检查是否存在名为`config_private.py`的私密配置文件,并用其中的配置覆盖`config.py`的同名配置。因此,如果您能理解我们的配置读取逻辑,我们强烈建议您在`config.py`旁边创建一个名为`config_private.py`的新配置文件,并把`config.py`中的配置转移(复制)到`config_private.py`中。`config_private.py`不受git管控,可以让您的隐私信息更加安全。P.S.项目同样支持通过`环境变量`配置大多数选项,环境变量的书写格式参考`docker-compose`文件。读取优先级: `环境变量` > `config_private.py` > `config.py`)
@@ -140,15 +140,9 @@ AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-
python main.py
```
-5. 测试函数插件
-```
-- 测试函数插件模板函数(要求gpt回答历史上的今天发生了什么),您可以根据此函数为模板,实现更复杂的功能
- 点击 "[函数插件模板Demo] 历史上的今天"
-```
-
## 安装-方法2:使用Docker
-1. 仅ChatGPT(推荐大多数人选择)
+1. 仅ChatGPT(推荐大多数人选择,等价于docker-compose方案1)
``` sh
git clone https://github.com/binary-husky/gpt_academic.git # 下载项目
@@ -161,26 +155,25 @@ docker run --rm -it --net=host gpt-academic
#(最后一步-选择2)在macOS/windows环境下,只能用-p选项将容器上的端口(例如50923)暴露给主机上的端口
docker run --rm -it -e WEB_PORT=50923 -p 50923:50923 gpt-academic
```
-P.S. 如果需要依赖Latex的插件功能,请见Wiki
+P.S. 如果需要依赖Latex的插件功能,请见Wiki。另外,您也可以直接使用docker-compose获取Latex功能(修改docker-compose.yml,保留方案4并删除其他方案)。
2. ChatGPT + ChatGLM + MOSS(需要熟悉Docker)
``` sh
-# 修改docker-compose.yml,删除方案1和方案3,保留方案2。修改docker-compose.yml中方案2的配置,参考其中注释即可
+# 修改docker-compose.yml,保留方案2并删除其他方案。修改docker-compose.yml中方案2的配置,参考其中注释即可
docker-compose up
```
3. ChatGPT + LLAMA + 盘古 + RWKV(需要熟悉Docker)
``` sh
-# 修改docker-compose.yml,删除方案1和方案2,保留方案3。修改docker-compose.yml中方案3的配置,参考其中注释即可
+# 修改docker-compose.yml,保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置,参考其中注释即可
docker-compose up
```
## 安装-方法3:其他部署姿势
1. 一键运行脚本。
-完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本,
-不建议电脑上已有python的用户采用此方法(在此基础上安装插件的依赖很麻烦)。
+完全不熟悉python环境的Windows用户可以下载[Release](https://github.com/binary-husky/gpt_academic/releases)中发布的一键运行脚本安装无本地模型的版本。
脚本的贡献来源是[oobabooga](https://github.com/oobabooga/one-click-installers)。
2. 使用docker-compose运行。
diff --git a/docker-compose.yml b/docker-compose.yml
index 07f1c9f..0a0dcda 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -103,3 +103,30 @@ services:
echo '[jittorllms] 正在从github拉取最新代码...' &&
git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force &&
python3 -u main.py"
+
+
+## ===================================================
+## 【方案四】 chatgpt + Latex
+## ===================================================
+version: '3'
+services:
+ gpt_academic_with_latex:
+ image: ghcr.io/binary-husky/gpt_academic_with_latex:master
+ environment:
+ # 请查阅 `config.py` 以查看所有的配置信息
+ API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
+ USE_PROXY: ' True '
+ proxies: ' { "http": "socks5h://localhost:10880", "https": "socks5h://localhost:10880", } '
+ LLM_MODEL: ' gpt-3.5-turbo '
+ AVAIL_LLM_MODELS: ' ["gpt-3.5-turbo", "gpt-4"] '
+ LOCAL_MODEL_DEVICE: ' cuda '
+ DEFAULT_WORKER_NUM: ' 10 '
+ WEB_PORT: ' 12303 '
+
+ # 与宿主的网络融合
+ network_mode: "host"
+
+ # 不使用代理网络拉取最新代码
+ command: >
+ bash -c "python3 -u main.py"
+
From 4290821a504ec2996241c09b262653111c7208b8 Mon Sep 17 00:00:00 2001
From: Xminry <46775500+Xminry@users.noreply.github.com>
Date: Tue, 27 Jun 2023 01:57:31 +0800
Subject: [PATCH 32/46] =?UTF-8?q?Update=20=E7=90=86=E8=A7=A3PDF=E6=96=87?=
=?UTF-8?q?=E6=A1=A3=E5=86=85=E5=AE=B9.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/理解PDF文档内容.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/crazy_functions/理解PDF文档内容.py b/crazy_functions/理解PDF文档内容.py
index 5050864..f1a89a7 100644
--- a/crazy_functions/理解PDF文档内容.py
+++ b/crazy_functions/理解PDF文档内容.py
@@ -13,7 +13,9 @@ def 解析PDF(file_name, llm_kwargs, plugin_kwargs, chatbot, history, system_pro
# 递归地切割PDF文件,每一块(尽量是完整的一个section,比如introduction,experiment等,必要时再进行切割)
# 的长度必须小于 2500 个 Token
file_content, page_one = read_and_clean_pdf_text(file_name) # (尝试)按照章节切割PDF
-
+ file_content = file_content.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
+ page_one = str(page_one).encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
+
TOKEN_LIMIT_PER_FRAGMENT = 2500
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
From f654c1af317ab6fccb40b0097800690a786d8d5d Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Tue, 27 Jun 2023 18:59:56 +0800
Subject: [PATCH 33/46] merge regex expressions
---
crazy_functions/crazy_functions_test.py | 6 +-
crazy_functions/latex_utils.py | 74 ++++++++++++-------------
2 files changed, 39 insertions(+), 41 deletions(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 3ef555d..f2d3969 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,9 +190,11 @@ def test_Latex():
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
# txt = r"https://arxiv.org/abs/2002.09253"
# txt = r"https://arxiv.org/abs/2306.07831"
- # txt = r"https://arxiv.org/abs/2212.10156"
+ txt = r"https://arxiv.org/abs/2212.10156"
# txt = r"https://arxiv.org/abs/2211.11559"
- txt = r"https://arxiv.org/abs/2303.08774"
+ # txt = r"https://arxiv.org/abs/2303.08774"
+ # txt = r"https://arxiv.org/abs/2303.12712"
+ # txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index a7eb9f2..83c4401 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -21,6 +21,7 @@ def set_forbidden_text(text, mask, pattern, flags=0):
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
+ if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
@@ -46,7 +47,7 @@ def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
mask[begin:end] = PRESERVE
return text, mask
-def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0):
+def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
@@ -64,6 +65,9 @@ def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0):
p += 1
end = p
mask[begin:end] = TRANSFORM
+ if forbid_wrapper:
+ mask[res.regs[0][0]:begin] = PRESERVE
+ mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
@@ -163,6 +167,7 @@ def rm_comments(main_file):
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
+ # main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?
Date: Tue, 27 Jun 2023 19:16:05 +0800
Subject: [PATCH 34/46] add `item` breaker
---
crazy_functions/latex_utils.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 83c4401..49f547c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -302,7 +302,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
# 吸收iffalse注释
text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
- # 吸收在25行以内的begin-end组合
+ # 吸收在42行以内的begin-end组合
text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
# 吸收匿名公式
text, mask = set_forbidden_text(text, mask, [ r"\$\$(.*?)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
@@ -321,7 +321,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
- text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}"])
+ text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
# reverse 操作必须放在最后
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
From 99cf7205c3059caaae0fa46f1739d602a95e1bf5 Mon Sep 17 00:00:00 2001
From: Xminry
Date: Wed, 28 Jun 2023 10:30:08 +0800
Subject: [PATCH 35/46] =?UTF-8?q?feat:=E8=81=94=E7=BD=91=E6=90=9C=E7=B4=A2?=
=?UTF-8?q?=E5=8A=9F=E8=83=BD=EF=BC=8Ccn.bing.com=E7=89=88=EF=BC=8C?=
=?UTF-8?q?=E5=9B=BD=E5=86=85=E5=8F=AF=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functional.py | 12 +++
crazy_functions/联网的ChatGPT_bing版.py | 102 ++++++++++++++++++++++++
2 files changed, 114 insertions(+)
create mode 100644 crazy_functions/联网的ChatGPT_bing版.py
diff --git a/crazy_functional.py b/crazy_functional.py
index a724b97..aea97a6 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -235,6 +235,18 @@ def get_crazy_functions():
except:
print('Load function plugin failed')
+ try:
+ from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
+ function_plugins.update({
+ "连接网络回答问题_bing搜索(先输入问题,再点击按钮,搜索引擎为cn.bing.com,国内可用)": {
+ "Color": "stop",
+ "AsButton": False, # 加入下拉菜单中
+ "Function": HotReload(连接bing搜索回答问题)
+ }
+ })
+ except:
+ print('Load function plugin failed')
+
try:
from crazy_functions.解析项目源代码 import 解析任意code项目
function_plugins.update({
diff --git a/crazy_functions/联网的ChatGPT_bing版.py b/crazy_functions/联网的ChatGPT_bing版.py
new file mode 100644
index 0000000..93a84a0
--- /dev/null
+++ b/crazy_functions/联网的ChatGPT_bing版.py
@@ -0,0 +1,102 @@
+from toolbox import CatchException, update_ui
+from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
+import requests
+from bs4 import BeautifulSoup
+from request_llm.bridge_all import model_info
+
+
+def bing_search(query, proxies=None):
+ query = query
+ url = f"https://cn.bing.com/search?q={query}"
+ headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
+ response = requests.get(url, headers=headers, proxies=proxies)
+ soup = BeautifulSoup(response.content, 'html.parser')
+ results = []
+ for g in soup.find_all('li', class_='b_algo'):
+ anchors = g.find_all('a')
+ if anchors:
+ link = anchors[0]['href']
+ if not link.startswith('http'):
+ continue
+ title = g.find('h2').text
+ item = {'title': title, 'link': link}
+ results.append(item)
+
+ for r in results:
+ print(r['link'])
+ return results
+
+
+def scrape_text(url, proxies) -> str:
+ """Scrape text from a webpage
+
+ Args:
+ url (str): The URL to scrape text from
+
+ Returns:
+ str: The scraped text
+ """
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
+ 'Content-Type': 'text/plain',
+ }
+ try:
+ response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
+ if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
+ except:
+ return "无法连接到该网页"
+ soup = BeautifulSoup(response.text, "html.parser")
+ for script in soup(["script", "style"]):
+ script.extract()
+ text = soup.get_text()
+ lines = (line.strip() for line in text.splitlines())
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
+ text = "\n".join(chunk for chunk in chunks if chunk)
+ return text
+
+@CatchException
+def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+ """
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
+ llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
+ chatbot 聊天显示框的句柄,用于显示给用户
+ history 聊天历史,前情提要
+ system_prompt 给gpt的静默提醒
+ web_port 当前软件运行的端口号
+ """
+ history = [] # 清空历史,以免输入溢出
+ chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
+ "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
+
+ # ------------- < 第1步:爬取搜索引擎的结果 > -------------
+ from toolbox import get_conf
+ proxies, = get_conf('proxies')
+ urls = bing_search(txt, proxies)
+ history = []
+
+ # ------------- < 第2步:依次访问网页 > -------------
+ max_search_result = 8 # 最多收纳多少个网页的结果
+ for index, url in enumerate(urls[:max_search_result]):
+ res = scrape_text(url['link'], proxies)
+ history.extend([f"第{index}份搜索结果:", res])
+ chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
+
+ # ------------- < 第3步:ChatGPT综合 > -------------
+ i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
+ i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
+ inputs=i_say,
+ history=history,
+ max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
+ )
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
+ inputs=i_say, inputs_show_user=i_say,
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
+ sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
+ )
+ chatbot[-1] = (i_say, gpt_say)
+ history.append(i_say);history.append(gpt_say)
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+
From eb4c07997ece2efe35fce63b8bb7c36b6179342a Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 11:30:42 +0800
Subject: [PATCH 36/46] =?UTF-8?q?=E4=BF=AE=E5=A4=8DLatex=E7=9F=AB=E9=94=99?=
=?UTF-8?q?=E5=92=8C=E6=9C=AC=E5=9C=B0Latex=E8=AE=BA=E6=96=87=E7=BF=BB?=
=?UTF-8?q?=E8=AF=91=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functional.py | 30 ++++++++++++-------------
crazy_functions/Latex输出PDF结果.py | 24 +++++++++++++++-----
crazy_functions/crazy_functions_test.py | 4 ++--
crazy_functions/latex_utils.py | 10 +++++----
4 files changed, 41 insertions(+), 27 deletions(-)
diff --git a/crazy_functional.py b/crazy_functional.py
index a724b97..7f8c41e 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -348,17 +348,28 @@ def get_crazy_functions():
try:
from crazy_functions.Latex输出PDF结果 import Latex英文纠错加PDF对比
function_plugins.update({
- "[功能尚不稳定] Latex英文纠错+LatexDiff高亮修正位置": {
+ "Latex英文纠错+高亮修正位置 [需Latex]": {
"Color": "stop",
"AsButton": False,
- # "AdvancedArgs": True,
- # "ArgsReminder": "",
+ "AdvancedArgs": True,
+ "ArgsReminder": "如果有必要, 请在此处追加更细致的矫错指令(使用英文)。",
"Function": HotReload(Latex英文纠错加PDF对比)
}
})
from crazy_functions.Latex输出PDF结果 import Latex翻译中文并重新编译PDF
function_plugins.update({
- "Arixv翻译(输入arxivID) [需Latex]": {
+ "Arixv翻译(输入arxivID)[需Latex]": {
+ "Color": "stop",
+ "AsButton": False,
+ "AdvancedArgs": True,
+ "ArgsReminder":
+ "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
+ "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
+ "Function": HotReload(Latex翻译中文并重新编译PDF)
+ }
+ })
+ function_plugins.update({
+ "本地论文翻译(上传Latex压缩包)[需Latex]": {
"Color": "stop",
"AsButton": False,
"AdvancedArgs": True,
@@ -368,17 +379,6 @@ def get_crazy_functions():
"Function": HotReload(Latex翻译中文并重新编译PDF)
}
})
- # function_plugins.update({
- # "本地论文翻译(上传Latex压缩包) [需Latex]": {
- # "Color": "stop",
- # "AsButton": False,
- # "AdvancedArgs": True,
- # "ArgsReminder":
- # "如果有必要, 请在此处给出自定义翻译命令, 解决部分词汇翻译不准确的问题。 "+
- # "例如当单词'agent'翻译不准确时, 请尝试把以下指令复制到高级参数区: " + 'If the term "agent" is used in this section, it should be translated to "智能体". ',
- # "Function": HotReload(Latex翻译中文并重新编译PDF)
- # }
- # })
except:
print('Load function plugin failed')
diff --git a/crazy_functions/Latex输出PDF结果.py b/crazy_functions/Latex输出PDF结果.py
index 1886375..810d802 100644
--- a/crazy_functions/Latex输出PDF结果.py
+++ b/crazy_functions/Latex输出PDF结果.py
@@ -19,9 +19,9 @@ def switch_prompt(pfg, mode, more_requirement):
- sys_prompt_array: A list of strings containing prompts for system prompts.
"""
n_split = len(pfg.sp_file_contents)
- if mode == 'proofread':
+ if mode == 'proofread_en':
inputs_array = [r"Below is a section from an academic paper, proofread this section." +
- r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " +
+ r"Do not modify any latex command such as \section, \cite, \begin, \item and equations. " + more_requirement +
r"Answer me only with the revised text:" +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)]
@@ -70,6 +70,12 @@ def move_project(project_folder, arxiv_id=None):
shutil.rmtree(new_workfolder)
except:
pass
+
+ # align subfolder if there is a folder wrapper
+ items = glob.glob(pj(project_folder,'*'))
+ if len(glob.glob(pj(project_folder,'*.tex'))) == 0 and len(items) == 1:
+ if os.path.isdir(items[0]): project_folder = items[0]
+
shutil.copytree(src=project_folder, dst=new_workfolder)
return new_workfolder
@@ -141,7 +147,11 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
chatbot.append([ "函数插件功能?",
"对整个Latex项目进行纠错, 用latex编译为PDF对修正处做高亮。函数插件贡献者: Binary-Husky。注意事项: 目前仅支持GPT3.5/GPT4,其他模型转化效果未知。目前对机器学习类文献转化效果最好,其他类型文献转化效果未知。仅在Windows系统进行了测试,其他操作系统表现未知。"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
-
+
+ # <-------------- more requirements ------------->
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
+ more_req = plugin_kwargs.get("advanced_arg", "")
+ _switch_prompt_ = partial(switch_prompt, more_requirement=more_req)
# <-------------- check deps ------------->
try:
@@ -180,13 +190,13 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
# <-------------- if merge_translate_zh is already generated, skip gpt req ------------->
- if not os.path.exists(project_folder + '/merge_proofread.tex'):
+ if not os.path.exists(project_folder + '/merge_proofread_en.tex'):
yield from Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
- chatbot, history, system_prompt, mode='proofread_latex', switch_prompt=switch_prompt)
+ chatbot, history, system_prompt, mode='proofread_en', switch_prompt=_switch_prompt_)
# <-------------- compile PDF ------------->
- success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread',
+ success = yield from 编译Latex(chatbot, history, main_file_original='merge', main_file_modified='merge_proofread_en',
work_folder_original=project_folder, work_folder_modified=project_folder, work_folder=project_folder)
@@ -195,6 +205,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+ promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
@@ -278,6 +289,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
if success:
chatbot.append((f"成功啦", '请查收结果(压缩包)...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
+ promote_file_to_downloadzone(file=zip_res, chatbot=chatbot)
else:
chatbot.append((f"失败了", '虽然PDF生成失败了, 但请查收结果(压缩包), 内含已经翻译的Tex文档, 也是可读的, 您可以到Github Issue区, 用该压缩包+对话历史存档进行反馈 ...'))
yield from update_ui(chatbot=chatbot, history=history); time.sleep(1) # 刷新界面
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index f2d3969..0c623b8 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -190,10 +190,10 @@ def test_Latex():
# txt = r"C:\Users\x\arxiv_cache\2211.16068\workfolder" # ACE
# txt = r"https://arxiv.org/abs/2002.09253"
# txt = r"https://arxiv.org/abs/2306.07831"
- txt = r"https://arxiv.org/abs/2212.10156"
+ # txt = r"https://arxiv.org/abs/2212.10156"
# txt = r"https://arxiv.org/abs/2211.11559"
# txt = r"https://arxiv.org/abs/2303.08774"
- # txt = r"https://arxiv.org/abs/2303.12712"
+ txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 49f547c..a38405c 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -430,7 +430,7 @@ class LatexPaperSplit():
"""
def __init__(self) -> None:
self.nodes = None
- self.msg = "{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
+ self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+\Latex 翻译插件一键生成," + \
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
@@ -741,13 +741,15 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
yield from update_ui_lastest_msg(f'第{n_fix}编译结束:
{results_}...', chatbot, history) # 刷新Gradio前端界面
+ if diff_pdf_success:
+ result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
+ promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(current_dir)
- result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf')
+ result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
- promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot)
+ promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
return True # 成功啦
else:
if n_fix>=max_try: break
From 64f76e7401a099cffc2e177835bdb4d30891062d Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 11:32:19 +0800
Subject: [PATCH 37/46] 3.42
---
version | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/version b/version
index ceb909a..6353b34 100644
--- a/version
+++ b/version
@@ -1,5 +1,5 @@
{
- "version": 3.41,
+ "version": 3.42,
"show_feature": true,
- "new_feature": "增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
+ "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
}
From 1a0009301548d9ccbaaaa0ed33fdfb62c76465b8 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 12:15:52 +0800
Subject: [PATCH 38/46] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/latex_utils.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index a38405c..8b41fc9 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -430,7 +430,7 @@ class LatexPaperSplit():
"""
def __init__(self) -> None:
self.nodes = None
- self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+\Latex 翻译插件一键生成," + \
+ self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
"版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
"项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
# 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
From 49253c4dc6393b68e08a0657011aad4c36fd7957 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 12:29:49 +0800
Subject: [PATCH 39/46] [arxiv trans] add html comparison to zip file
---
crazy_functions/latex_utils.py | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 8b41fc9..69f05ff 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -532,11 +532,11 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
-def write_html(sp_file_contents, sp_file_result, chatbot):
+def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
- import copy
+ import shutil
from .crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
@@ -554,6 +554,7 @@ def write_html(sp_file_contents, sp_file_result, chatbot):
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
+ shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
@@ -634,7 +635,7 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
pfg.get_token_num = None
objdump(pfg, file=pj(project_folder,'temp.pkl'))
- write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot)
+ write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
From aced272d3c3d4c3b3fd250b6c97c574cd95b30f8 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Thu, 29 Jun 2023 12:43:50 +0800
Subject: [PATCH 40/46] =?UTF-8?q?=E5=BE=AE=E8=B0=83=E6=8F=92=E4=BB=B6?=
=?UTF-8?q?=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functional.py | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/crazy_functional.py b/crazy_functional.py
index ec3235e..03aaaf5 100644
--- a/crazy_functional.py
+++ b/crazy_functional.py
@@ -226,19 +226,15 @@ def get_crazy_functions():
try:
from crazy_functions.联网的ChatGPT import 连接网络回答问题
function_plugins.update({
- "连接网络回答问题(先输入问题,再点击按钮,需要访问谷歌)": {
+ "连接网络回答问题(输入问题后点击该插件,需要访问谷歌)": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(连接网络回答问题)
}
})
- except:
- print('Load function plugin failed')
-
- try:
from crazy_functions.联网的ChatGPT_bing版 import 连接bing搜索回答问题
function_plugins.update({
- "连接网络回答问题_bing搜索(先输入问题,再点击按钮,搜索引擎为cn.bing.com,国内可用)": {
+ "连接网络回答问题(中文Bing版,输入问题后点击该插件)": {
"Color": "stop",
"AsButton": False, # 加入下拉菜单中
"Function": HotReload(连接bing搜索回答问题)
From 3b78e0538b8890d7eefa8858948117be8d4da3e1 Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Thu, 29 Jun 2023 14:52:58 +0800
Subject: [PATCH 41/46] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=8F=92=E4=BB=B6demo?=
=?UTF-8?q?=E7=9A=84=E5=9B=BE=E5=83=8F=E6=98=BE=E7=A4=BA=E7=9A=84=E9=97=AE?=
=?UTF-8?q?=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
crazy_functions/高级功能函数模板.py | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/crazy_functions/高级功能函数模板.py b/crazy_functions/高级功能函数模板.py
index 7c6a7ff..73ae45f 100644
--- a/crazy_functions/高级功能函数模板.py
+++ b/crazy_functions/高级功能函数模板.py
@@ -1,6 +1,7 @@
from toolbox import CatchException, update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
-import datetime
+import datetime, re
+
@CatchException
def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
@@ -18,12 +19,34 @@ def 高阶功能模板函数(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
for i in range(5):
currentMonth = (datetime.date.today() + datetime.timedelta(days=i)).month
currentDay = (datetime.date.today() + datetime.timedelta(days=i)).day
- i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?列举两条并发送相关图片。发送图片时,请使用Markdown,将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词。'
+ i_say = f'历史中哪些事件发生在{currentMonth}月{currentDay}日?用中文列举两条,然后分别给出描述事件的两个英文单词。' + '当你给出关键词时,使用以下json格式:{"KeyWords":[EnglishKeyWord1,EnglishKeyWord2]}。'
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
- sys_prompt="当你想发送一张照片时,请使用Markdown, 并且不要有反斜线, 不要用代码块。使用 Unsplash API (https://source.unsplash.com/1280x720/? < PUT_YOUR_QUERY_HERE >)。"
+ sys_prompt='输出格式示例:1908年,美国消防救援事业发展的“美国消防协会”成立。关键词:{"KeyWords":["Fire","American"]}。'
)
+ gpt_say = get_images(gpt_say)
chatbot[-1] = (i_say, gpt_say)
history.append(i_say);history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
+
+
+def get_images(gpt_say):
+ def get_image_by_keyword(keyword):
+ import requests
+ from bs4 import BeautifulSoup
+ response = requests.get(f'https://wallhaven.cc/search?q={keyword}', timeout=2)
+ for image_element in BeautifulSoup(response.content, 'html.parser').findAll("img"):
+ if "data-src" in image_element: break
+ return image_element["data-src"]
+
+ for keywords in re.findall('{"KeyWords":\[(.*?)\]}', gpt_say):
+ keywords = [n.strip('"') for n in keywords.split(',')]
+ try:
+ description = keywords[0]
+ url = get_image_by_keyword(keywords[0])
+ img_tag = f"\n\n"
+ gpt_say += img_tag
+ except:
+ continue
+ return gpt_say
\ No newline at end of file
From 37172906ef5a697d2ef3ee272147a27dd67ae138 Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Thu, 29 Jun 2023 14:55:55 +0800
Subject: [PATCH 42/46] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=87=E4=BB=B6?=
=?UTF-8?q?=E5=AF=BC=E5=87=BA=E7=9A=84bug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
toolbox.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/toolbox.py b/toolbox.py
index fb6aa9f..256d99c 100644
--- a/toolbox.py
+++ b/toolbox.py
@@ -498,7 +498,7 @@ def on_report_generated(cookies, files, chatbot):
else:
report_files = find_recent_files('gpt_log')
if len(report_files) == 0:
- return None, chatbot
+ return cookies, None, chatbot
# files.extend(report_files)
file_links = ''
for f in report_files: file_links += f'
{f}'
From 22f377e2fb6bf45c2a0447c0680ee0a1eba8f6d7 Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 30 Jun 2023 11:05:47 +0800
Subject: [PATCH 43/46] fix multi user cwd shift
---
crazy_functions/crazy_functions_test.py | 3 +-
crazy_functions/latex_utils.py | 51 ++++++++++++++++---------
2 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/crazy_functions/crazy_functions_test.py b/crazy_functions/crazy_functions_test.py
index 0c623b8..60b6b87 100644
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@@ -193,8 +193,9 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2212.10156"
# txt = r"https://arxiv.org/abs/2211.11559"
# txt = r"https://arxiv.org/abs/2303.08774"
- txt = r"https://arxiv.org/abs/2303.12712"
+ # txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
+ txt = r"C:\Users\fuqingxu\Desktop\9"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
diff --git a/crazy_functions/latex_utils.py b/crazy_functions/latex_utils.py
index 69f05ff..eb65a8a 100644
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@@ -27,6 +27,24 @@ def set_forbidden_text(text, mask, pattern, flags=0):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
+def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
+ """
+ Move area out of preserve area (make text editable for GPT)
+ count the number of the braces so as to catch compelete text area.
+ e.g.
+ \begin{abstract} blablablablablabla. \end{abstract}
+ """
+ if isinstance(pattern, list): pattern = '|'.join(pattern)
+ pattern_compile = re.compile(pattern, flags)
+ for res in pattern_compile.finditer(text):
+ if not forbid_wrapper:
+ mask[res.span()[0]:res.span()[1]] = TRANSFORM
+ else:
+ mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
+ mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
+ mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
+ return text, mask
+
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
@@ -326,6 +344,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
# reverse 操作必须放在最后
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
+ text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
root = convert_to_linklist(text, mask)
# 修复括号
@@ -672,10 +691,9 @@ def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1]
-
-def compile_latex_with_timeout(command, timeout=60):
+def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
- process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
@@ -699,24 +717,24 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
# 只有第二步成功,才能继续下面的步骤
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux'); os.chdir(current_dir)
+ ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux'); os.chdir(current_dir)
+ ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
- os.chdir(work_folder_original); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex'); os.chdir(current_dir)
- os.chdir(work_folder_modified); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex'); os.chdir(current_dir)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
if mode!='translate_zh':
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
@@ -724,13 +742,11 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'bibtex merge_diff.aux'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
- os.chdir(work_folder); ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex'); os.chdir(current_dir)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
+ ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
+ ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
- # <--------------------->
- os.chdir(current_dir)
# <---------- 检查结果 ----------->
results_ = ""
@@ -766,7 +782,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break
- os.chdir(current_dir)
return False # 失败啦
From 403667aec18cba2d9fb719afa946168f3907124f Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Fri, 30 Jun 2023 12:06:28 +0800
Subject: [PATCH 44/46] upgrade chatglm to chatglm2
---
request_llm/bridge_all.py | 11 ++++++++++-
request_llm/bridge_chatglm.py | 6 +++---
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/request_llm/bridge_all.py b/request_llm/bridge_all.py
index 02cfe98..d33f161 100644
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@@ -152,7 +152,7 @@ model_info = {
"token_cnt": get_token_num_gpt4,
},
- # chatglm
+ # 将 chatglm 直接对齐到 chatglm2
"chatglm": {
"fn_with_ui": chatglm_ui,
"fn_without_ui": chatglm_noui,
@@ -161,6 +161,15 @@ model_info = {
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
+ "chatglm2": {
+ "fn_with_ui": chatglm_ui,
+ "fn_without_ui": chatglm_noui,
+ "endpoint": None,
+ "max_token": 1024,
+ "tokenizer": tokenizer_gpt35,
+ "token_cnt": get_token_num_gpt35,
+ },
+
# newbing
"newbing": {
"fn_with_ui": newbing_ui,
diff --git a/request_llm/bridge_chatglm.py b/request_llm/bridge_chatglm.py
index 100783d..deaacd2 100644
--- a/request_llm/bridge_chatglm.py
+++ b/request_llm/bridge_chatglm.py
@@ -40,12 +40,12 @@ class GetGLMHandle(Process):
while True:
try:
if self.chatglm_model is None:
- self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
+ self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE')
if device=='cpu':
- self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).float()
+ self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
else:
- self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda()
+ self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval()
break
else:
From ecb08e69be5b39a206b742c365379286260ecabe Mon Sep 17 00:00:00 2001
From: qingxu fu <505030475@qq.com>
Date: Fri, 30 Jun 2023 13:08:54 +0800
Subject: [PATCH 45/46] remove find picture core functionality
---
core_functional.py | 2 +-
crazy_functions/批量翻译PDF文档_多线程.py | 19 +++++--------------
2 files changed, 6 insertions(+), 15 deletions(-)
diff --git a/core_functional.py b/core_functional.py
index e126b57..7bc3582 100644
--- a/core_functional.py
+++ b/core_functional.py
@@ -63,6 +63,7 @@ def get_core_functions():
"Prefix": r"我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL," +
r"然后请使用Markdown格式封装,并且不要有反斜线,不要用代码块。现在,请按以下描述给我发送图片:" + "\n\n",
"Suffix": r"",
+ "Visible": False,
},
"解释代码": {
"Prefix": r"请解释以下代码:" + "\n```\n",
@@ -73,6 +74,5 @@ def get_core_functions():
r"Note that, reference styles maybe more than one kind, you should transform each item correctly." +
r"Items need to be transformed:",
"Suffix": r"",
- "Visible": False,
}
}
diff --git a/crazy_functions/批量翻译PDF文档_多线程.py b/crazy_functions/批量翻译PDF文档_多线程.py
index 06d8a5a..0adac96 100644
--- a/crazy_functions/批量翻译PDF文档_多线程.py
+++ b/crazy_functions/批量翻译PDF文档_多线程.py
@@ -1,5 +1,5 @@
from toolbox import CatchException, report_execption, write_results_to_file
-from toolbox import update_ui
+from toolbox import update_ui, promote_file_to_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import read_and_clean_pdf_text
@@ -147,23 +147,14 @@ def 解析PDF(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot,
print('writing html result failed:', trimmed_format_exc())
# 准备文件的下载
- import shutil
for pdf_path in generated_conclusion_files:
# 重命名文件
- rename_file = f'./gpt_log/翻译-{os.path.basename(pdf_path)}'
- if os.path.exists(rename_file):
- os.remove(rename_file)
- shutil.copyfile(pdf_path, rename_file)
- if os.path.exists(pdf_path):
- os.remove(pdf_path)
+ rename_file = f'翻译-{os.path.basename(pdf_path)}'
+ promote_file_to_downloadzone(pdf_path, rename_file=rename_file, chatbot=chatbot)
for html_path in generated_html_files:
# 重命名文件
- rename_file = f'./gpt_log/翻译-{os.path.basename(html_path)}'
- if os.path.exists(rename_file):
- os.remove(rename_file)
- shutil.copyfile(html_path, rename_file)
- if os.path.exists(html_path):
- os.remove(html_path)
+ rename_file = f'翻译-{os.path.basename(html_path)}'
+ promote_file_to_downloadzone(html_path, rename_file=rename_file, chatbot=chatbot)
chatbot.append(("给出输出文件清单", str(generated_conclusion_files + generated_html_files)))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
From df3f1aa3cac73b5906e36b446ee4cb6edd91af6a Mon Sep 17 00:00:00 2001
From: binary-husky
Date: Fri, 30 Jun 2023 14:56:22 +0800
Subject: [PATCH 46/46] =?UTF-8?q?=E6=9B=B4=E6=AD=A3ChatGLM2=E7=9A=84?=
=?UTF-8?q?=E9=BB=98=E8=AE=A4Token=E6=95=B0=E9=87=8F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
main.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/main.py b/main.py
index 65e1f4c..2144010 100644
--- a/main.py
+++ b/main.py
@@ -104,7 +104,7 @@ def main():
system_prompt = gr.Textbox(show_label=True, placeholder=f"System Prompt", label="System prompt", value=initial_prompt)
top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01,interactive=True, label="Top-p (nucleus sampling)",)
temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature",)
- max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="Local LLM MaxLength",)
+ max_length_sl = gr.Slider(minimum=256, maximum=8192, value=4096, step=1, interactive=True, label="Local LLM MaxLength",)
checkboxes = gr.CheckboxGroup(["基础功能区", "函数插件区", "底部输入区", "输入清除键", "插件参数区"], value=["基础功能区", "函数插件区"], label="显示/隐藏功能区")
md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)