Compare commits

...

56 Commits

Author SHA1 Message Date
e4e2430255 version 3.47 2023-07-24 19:58:47 +08:00
1732127a28 Merge pull request #979 from fenglui/master
增加chatGLM int4配置支持 小显存也可以选择chatGLM
2023-07-24 19:52:27 +08:00
56bb8b6498 improve re efficiency 2023-07-24 18:50:29 +08:00
e93b6fa3a6 Add GLM INT8 2023-07-24 18:19:57 +08:00
dd4ba0ea22 Merge branch 'master' of https://github.com/fenglui/gpt_academic into fenglui-master 2023-07-24 18:06:15 +08:00
c2701c9ce5 Merge pull request #986 from one-pr/git-clone
默认仅 clone 最新的代码,减小 git clone 的大小
2023-07-24 17:48:35 +08:00
2f019ce359 优化 README.md 中的其他 git clone 2023-07-24 15:14:48 +08:00
c5b147aeb7 默认仅 clone 最新的代码,减小 git clone 的大小 2023-07-24 15:14:42 +08:00
5813d65e52 增加chatGLM int4配置支持 小显存也可以选择chatGLM 2023-07-22 08:29:15 +08:00
a393edfaa4 ALLOW CUSTOM API KEY PATTERN 2023-07-21 22:49:07 +08:00
dd7a01cda5 Merge pull request #976 from fenglui/master
fix msg.data.split(DELIMITER) exception when msg.data is int
2023-07-21 17:02:29 +08:00
00a3b91f95 fix msg.data.split(DELIMITER) exception when msg.data is int 2023-07-21 03:51:33 +08:00
61ba544282 add latex test samples 2023-07-20 19:49:23 +08:00
b5b8c123e4 latex plugin stability improvement 2023-07-20 19:39:22 +08:00
d9ceba959f expand range after failure 2023-07-20 18:39:02 +08:00
6b5b040701 remove pdf merge 2023-07-20 18:29:06 +08:00
4f4c09a5f3 增强Latex修复能力 2023-07-20 18:08:22 +08:00
067bc97cce Merge branch 'interface-interlm' of https://github.com/binary-husky/chatgpt_academic into interface-interlm 2023-07-20 12:46:52 +08:00
7368580cd6 concat pdf after translation 2023-07-20 12:46:48 +08:00
df90db210c Merge branch 'master' into interface-interlm 2023-07-20 11:40:45 +08:00
0927ed20a2 edit default configuration 2023-07-20 11:39:35 +08:00
73b22f85be compat third party gpt error handle 2023-07-20 11:09:22 +08:00
b8d77557b0 Update README.md 2023-07-20 10:12:42 +08:00
99b8fce8f3 Merge pull request #965 from QQisQQ/patch-2
解决new bing 报错200 (fix new bing error code 200 )
2023-07-19 10:15:15 +08:00
16364f1b2d Merge pull request #966 from doujiang-zheng/master
Add timestamp for chat_secrets.log and disable the verbose httpx log.
2023-07-19 10:14:36 +08:00
3b88e00cfb Add timestamp for chat_secrets.log and disable the verbose httpx log. 2023-07-19 09:43:59 +08:00
0c8c539e9b 解决new bing 报错200 (fix new bing error code 200 )
modify from 16e00af9d5

works for my issue:
```
Traceback (most recent call last):
  File "./request_llm/bridge_newbingfree.py", line 152, in run
    asyncio.run(self.async_run())
  File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/runners.py", line 190, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/base_events.py", line 653, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "./request_llm/bridge_newbingfree.py", line 98, in async_run
    async for final, response in self.newbing_model.ask_stream(
  File "./request_llm/edge_gpt_free.py", line 676, in ask_stream
    async for response in self.chat_hub.ask_stream(
  File "./request_llm/edge_gpt_free.py", line 456, in ask_stream
    self.wss = await self.session.ws_connect(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/py311/lib/python3.11/site-packages/aiohttp/client.py", line 795, in _ws_connect
    raise WSServerHandshakeError(
aiohttp.client_exceptions.WSServerHandshakeError: 200, message='Invalid response status', url=URL('wss://sydney.bing.com/sydney/ChatHub')
```
2023-07-19 04:39:15 +08:00
fd549fb986 merge success 2023-07-18 19:51:13 +08:00
babb775cfb interface with interlm 2023-07-18 16:33:34 +08:00
eef9e470c9 Latex解除非UTF8编码错误 2023-07-18 11:00:20 +08:00
3002c6318a Update README.md 2023-07-17 22:21:39 +08:00
6d0bceaebd 移除插件依赖 2023-07-17 22:00:29 +08:00
aa51d6fde6 up 2023-07-17 21:54:28 +08:00
136479e218 Update README.md 2023-07-17 10:38:46 +08:00
19a2742354 Merge pull request #957 from 1Haschwalth/patch-1
Update README.md
2023-07-17 10:35:15 +08:00
45aac96dd3 Update README.md 2023-07-16 21:50:08 +08:00
6f21ae8939 support claude api 2023-07-16 15:03:05 +08:00
add98f4eeb 修复自动版本升级Bug 2023-07-16 13:23:28 +08:00
fe231f72b6 fix theme folder rename problem 2023-07-16 13:15:55 +08:00
b308fde480 update readme 2023-07-15 19:19:39 +08:00
f3e14ff806 更新繁體中文映射詞典 2023-07-15 19:11:00 +08:00
79ef9bdf1c update English projection dictionary 2023-07-15 19:01:49 +08:00
a3e938aee9 Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-07-15 18:41:46 +08:00
b19a6155f4 restore jittor support 2023-07-15 18:41:35 +08:00
801f7342b1 Update config.py 2023-07-15 17:58:34 +08:00
4829fa0f35 Update README.md 2023-07-15 17:46:19 +08:00
3671f4208e Update README.md 2023-07-15 17:39:04 +08:00
e8c51181ee 进一步提高语音识别的实时性 2023-07-15 17:02:00 +08:00
3ccbb4d6fb 移除google字体 2023-07-15 17:01:37 +08:00
93fe457e99 Merge branch 'master' of github.com:binary-husky/chatgpt_academic 2023-07-15 16:41:46 +08:00
afac657aaa 解决语音助手看门狗线程泄露的问题 2023-07-15 16:41:11 +08:00
3e5c32860a Update README.md 2023-07-15 14:59:05 +08:00
d577bb38b6 Update use_audio.md 2023-07-15 14:58:27 +08:00
418bc32b39 Update use_audio.md 2023-07-15 14:53:30 +08:00
7148ea0596 更新README 2023-07-15 14:44:07 +08:00
87adb17df4 3.46 2023-07-15 14:38:18 +08:00
36 changed files with 2218 additions and 680 deletions

View File

@ -1,5 +1,5 @@
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
name: Create and publish a Docker image for ChatGLM support
name: build-with-jittorllms
on:
push:

View File

@ -5,7 +5,7 @@
> `pip install -r requirements.txt`
# <div align=center><img src="docs/logo.png" width="40" > GPT 学术优化 (GPT Academic)</div>
# <div align=center><img src="docs/logo.png" width="40"> GPT 学术优化 (GPT Academic)</div>
**如果喜欢这个项目请给它一个Star如果您发明了好用的快捷键或函数插件欢迎发pull requests**
@ -18,14 +18,14 @@ To translate this project to arbitary language with GPT, read and run [`multi_la
>
> 2.本项目中每个文件的功能都在自译解[`self_analysis.md`](https://github.com/binary-husky/gpt_academic/wiki/chatgpt-academic%E9%A1%B9%E7%9B%AE%E8%87%AA%E8%AF%91%E8%A7%A3%E6%8A%A5%E5%91%8A)详细说明。随着版本的迭代您也可以随时自行点击相关函数插件调用GPT重新生成项目的自我解析报告。常见问题汇总在[`wiki`](https://github.com/binary-husky/gpt_academic/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98)当中。[安装方法](#installation)。
>
> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM和Moss等等。支持多个api-key共存可在配置文件中填写如`API_KEY="openai-key1,openai-key2,api2d-key3"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。
> 3.本项目兼容并鼓励尝试国产大语言模型ChatGLM和Moss等等。支持多个api-key共存可在配置文件中填写如`API_KEY="openai-key1,openai-key2,azure-key3,api2d-key4"`。需要临时更换`API_KEY`时,在输入区输入临时的`API_KEY`然后回车键提交后即可生效。
<div align="center">
功能 | 描述
功能(⭐= 近期新增功能) | 描述
--- | ---
一键润色 | 支持一键润色、一键查找论文语法错误
一键中英互译 | 一键中英互译
@ -44,12 +44,13 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
多线程函数插件支持 | 支持多线调用chatgpt一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
启动暗色[主题](https://github.com/binary-husky/gpt_academic/issues/173) | 在浏览器url后面添加```/?__theme=dark```可以切换dark主题
[多LLM模型](https://www.bilibili.com/video/BV1wT411p7yf)支持 | 同时被GPT3.5、GPT4、[清华ChatGLM2](https://github.com/THUDM/ChatGLM2-6B)、[复旦MOSS](https://github.com/OpenLMLab/MOSS)同时伺候的感觉一定会很不错吧?
ChatGLM2微调模型 | 支持加载ChatGLM2微调模型提供ChatGLM2微调插件
ChatGLM2微调模型 | 支持加载ChatGLM2微调模型提供ChatGLM2微调辅助插件
更多LLM模型接入支持[huggingface部署](https://huggingface.co/spaces/qingxu98/gpt-academic) | 加入Newbing接口(新必应),引入清华[Jittorllms](https://github.com/Jittor/JittorLLMs)支持[LLaMA](https://github.com/facebookresearch/llama)和[盘古α](https://openi.org.cn/pangu/)
更多新功能展示 (图像生成等) …… | 见本文档结尾处 ……
@ -92,7 +93,7 @@ ChatGLM2微调模型 | 支持加载ChatGLM2微调模型提供ChatGLM2微调
1. 下载项目
```sh
git clone https://github.com/binary-husky/gpt_academic.git
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
cd gpt_academic
```
@ -125,7 +126,7 @@ python -m pip install -r request_llm/requirements_chatglm.txt
# 【可选步骤II】支持复旦MOSS
python -m pip install -r request_llm/requirements_moss.txt
git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss # 注意执行此行代码时,必须处于项目根路径
git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss # 注意执行此行代码时,必须处于项目根路径
# 【可选步骤III】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型目前支持的全部模型如下(jittorllms系列目前仅支持docker方案)
AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
@ -145,10 +146,10 @@ python main.py
1. 仅ChatGPT推荐大多数人选择等价于docker-compose方案1
[![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-without-local-llms.yml)
[![basic](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
[![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)
``` sh
git clone https://github.com/binary-husky/gpt_academic.git # 下载项目
git clone --depth=1 https://github.com/binary-husky/gpt_academic.git # 下载项目
cd gpt_academic # 进入路径
nano config.py # 用任意文本编辑器编辑config.py, 配置 “Proxy” “API_KEY” 以及 “WEB_PORT” (例如50923) 等
docker build -t gpt-academic . # 安装
@ -169,6 +170,8 @@ docker-compose up
```
3. ChatGPT + LLAMA + 盘古 + RWKV需要熟悉Docker
[![jittorllms](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-jittorllms.yml)
``` sh
# 修改docker-compose.yml保留方案3并删除其他方案。修改docker-compose.yml中方案3的配置参考其中注释即可
docker-compose up
@ -287,6 +290,7 @@ Tip不指定文件直接点击 `载入对话历史存档` 可以查看历史h
### II版本:
- version 3.5(Todo): 使用自然语言调用本项目的所有函数插件(高优先级)
- version 3.46: 支持完全脱手操作的实时语音对话
- version 3.45: 支持自定义ChatGLM2微调模型
- version 3.44: 正式支持Azure优化界面易用性
- version 3.4: +arxiv论文翻译、latex论文批改功能
@ -310,7 +314,7 @@ gpt_academic开发者QQ群-2610599535
- 官方Gradio目前有很多兼容性Bug请务必使用`requirement.txt`安装Gradio
### III主题
可以通过修改`THEME`选项config.py变更主题
1. `Chuanhu-Small-and-Beautiful` [网址](https://github.com/GaiZhenbiao/ChuanhuChatGPT/)

View File

@ -117,7 +117,7 @@ def auto_update(raise_error=False):
with open('./version', 'r', encoding='utf8') as f:
current_version = f.read()
current_version = json.loads(current_version)['version']
if (remote_version - current_version) >= 0.01:
if (remote_version - current_version) >= 0.01-1e-5:
from colorful import print亮黄
print亮黄(
f'\n新版本可用。新版本:{remote_version},当前版本:{current_version}{new_feature}')

View File

@ -32,9 +32,9 @@ else:
# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
# 重新URL重新定向实现更换API_URL的作用常规情况下不要修改!! 高危设置!通过修改此设置您将把您的API-KEY和对话隐私完全暴露给您设定的中间人
# 格式 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
# 例如 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions":"https://reverse-proxy-url/v1/chat/completions"}
# 重新URL重新定向实现更换API_URL的作用高危设置! 常规情况下不要修改! 通过修改此设置您将把您的API-KEY和对话隐私完全暴露给您设定的中间人
# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
# 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
API_URL_REDIRECT = {}
@ -71,7 +71,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# ChatGLM(2) Finetune Model Path 如果使用ChatGLM2微调模型需要把"chatglmft"加入AVAIL_LLM_MODELS中
@ -80,6 +80,7 @@ ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b
# 本地LLM模型如ChatGLM的执行方式 CPU/GPU
LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本
# 设置gradio的并行线程数不需要修改
@ -89,9 +90,11 @@ CONCURRENT_COUNT = 100
# 是否在提交时自动清空输入框
AUTO_CLEAR_TXT = False
# 色彩主体,可选 ["Default", "Chuanhu-Small-and-Beautiful"]
THEME = "Default"
# 加一个live2d装饰
ADD_WAIFU = False
@ -127,7 +130,15 @@ put your new bing cookies here
"""
# 阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https://help.aliyun.com/document_detail/450255.html
# 阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md
ENABLE_AUDIO = False
ALIYUN_TOKEN="" # 例如 f37f30e0f9934c34a992f6f64f7eba4f
ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
# Claude API KEY
ANTHROPIC_API_KEY = ""
# 自定义API KEY格式
CUSTOM_API_KEY_PATTERN = ""

View File

@ -157,7 +157,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_utils import Latex精细分解与转化, 编译Latex
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
@ -234,7 +234,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_utils import Latex精细分解与转化, 编译Latex
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])

View File

@ -195,9 +195,12 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2303.08774"
# txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
# txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
# txt = "https://arxiv.org/abs/2205.14135"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
txt = r"2210.03629"
txt = r"2307.04964"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
@ -240,7 +243,7 @@ if __name__ == "__main__":
# test_数学动画生成manim()
# test_Langchain知识库()
# test_Langchain知识库读取()
# test_Latex()
test_chatglm_finetune()
test_Latex()
# test_chatglm_finetune()
input("程序完成,回车退出。")
print("退出。")

View File

@ -1,320 +1,16 @@
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
import os, shutil
import re
import numpy as np
pj = os.path.join
"""
========================================================================
Part One
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
========================================================================
"""
PRESERVE = 0
TRANSFORM = 1
def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = -1
p = begin = end = res.regs[0][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p+1
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
"""
pattern_compile = re.compile(pattern, flags)
def search_with_line_limit(text, mask):
for res in pattern_compile.finditer(text):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
this, this_mask = search_with_line_limit(this, this_mask)
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
else:
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
return text, mask
return search_with_line_limit(text, mask)
class LinkedListNode():
"""
Linked List Node
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
# self.begin_line = 0
# self.begin_char = 0
def convert_to_linklist(text, mask):
root = LinkedListNode("", preserve=True)
current_node = root
for c, m, i in zip(text, mask, range(len(text))):
if (m==PRESERVE and current_node.preserve) \
or (m==TRANSFORM and not current_node.preserve):
# add
current_node.string += c
else:
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
current_node = current_node.next
return root
"""
========================================================================
Latex Merge File
========================================================================
"""
def 寻找Latex主文件(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
else:
continue
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.lstrip().startswith("%"):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
def find_tex_file_ignore_case(fp):
dir_name = os.path.dirname(fp)
base_name = os.path.basename(fp)
if not base_name.endswith('.tex'): base_name+='.tex'
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
# go case in-sensitive
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
"""
Merge Tex project recrusively
"""
main_file = rm_comments(main_file)
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp}Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
Merge Tex project recrusively
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
main_file = rm_comments(main_file)
if mode == 'translate_zh':
# find paper documentclass
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find documentclass statement!"
position = match.end()
add_ctex = '\\usepackage{ctex}\n'
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
return main_file
"""
========================================================================
Post process
========================================================================
"""
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
# walk and replace any _ without \
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
def compute_brace_level(string):
# this function count the number of { and }
brace_level = 0
for c in string:
if c == "{": brace_level += 1
elif c == "}": brace_level -= 1
return brace_level
def join_most(tex_t, tex_o):
# this function join translated string and original string when something goes wrong
p_t = 0
p_o = 0
def find_next(string, chars, begin):
p = begin
while p < len(string):
if string[p] in chars: return p, string[p]
p += 1
return None, None
while True:
res1, char = find_next(tex_o, ['{','}'], p_o)
if res1 is None: break
res2, char = find_next(tex_t, [char], p_t)
if res2 is None: break
p_o = res1 + 1
p_t = res2 + 1
return tex_t[:p_t] + tex_o[p_o:]
if compute_brace_level(final_tex) != compute_brace_level(node_string):
# 出问题了,还原部分原文,保证括号正确
final_tex = join_most(final_tex, node_string)
return final_tex
def split_subprocess(txt, project_folder, return_dict, opts):
"""
@ -326,7 +22,8 @@ def split_subprocess(txt, project_folder, return_dict, opts):
mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
# 吸收title与作者以上的部分
text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL)
text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL)
# 吸收iffalse注释
text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
# 吸收在42行以内的begin-end组合
@ -356,77 +53,9 @@ def split_subprocess(txt, project_folder, return_dict, opts):
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
root = convert_to_linklist(text, mask)
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack fix')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
# 最后一步处理,增强稳健性
root = post_process(root)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
# 屏蔽空行和太短的句子
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
node = node.next
if node is None: break
node = root
while True:
if node.next and node.preserve and node.next.preserve:
node.string += node.next.string
node.next = node.next.next
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 输出html调试文件用红色标注处保留区PRESERVE用黑色标注转换区TRANSFORM
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
segment_parts_for_gpt = []
@ -437,7 +66,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
show_html = node.string.replace('\n','<br/>')
if not node.preserve:
segment_parts_for_gpt.append(node.string)
f.write(f'<p style="color:black;">#{show_html}#</p>')
f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
else:
f.write(f'<p style="color:red;">{show_html}</p>')
node = node.next
@ -448,8 +77,6 @@ def split_subprocess(txt, project_folder, return_dict, opts):
return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
return return_dict
class LatexPaperSplit():
"""
break down latex file to a linked list,
@ -464,18 +91,32 @@ class LatexPaperSplit():
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
def merge_result(self, arr, mode, msg):
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
"""
Merge the result after the GPT process completed
"""
result_string = ""
p = 0
node_cnt = 0
line_cnt = 0
for node in self.nodes:
if node.preserve:
line_cnt += node.string.count('\n')
result_string += node.string
else:
result_string += fix_content(arr[p], node.string)
p += 1
translated_txt = fix_content(arr[node_cnt], node.string)
begin_line = line_cnt
end_line = line_cnt + translated_txt.count('\n')
# reverse translation if any error
if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
translated_txt = node.string
result_string += translated_txt
node_cnt += 1
line_cnt += translated_txt.count('\n')
if mode == 'translate_zh':
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(result_string)
@ -490,6 +131,7 @@ class LatexPaperSplit():
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
return result_string
def split(self, txt, project_folder, opts):
"""
break down latex file to a linked list,
@ -511,7 +153,6 @@ class LatexPaperSplit():
return self.sp
class LatexPaperFileGroup():
"""
use tokenizer to break down text according to max_token_limit
@ -539,7 +180,7 @@ class LatexPaperFileGroup():
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
@ -560,41 +201,14 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
import shutil
from .crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
orig = ""
trans = ""
final = []
for c,r in zip(sp_file_contents, sp_file_result):
final.append(c)
final.append(r)
for i, k in enumerate(final):
if i%2==0:
orig = k
if i%2==1:
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
# <-------- 寻找主tex文件 ---------->
maintex = 寻找Latex主文件(file_manifest, mode)
maintex = find_main_tex_file(file_manifest, mode)
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
time.sleep(3)
@ -668,54 +282,51 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}"
final_tex = lps.merge_result(pfg.file_result, mode, msg)
objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
# <-------- 整理结果, 退出 ---------->
chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 返回 ---------->
return project_folder + f'/merge_{mode}.tex'
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
try:
with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
log = f.read()
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
file_lines = f.readlines()
import re
buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
buggy_lines = [int(l) for l in buggy_lines]
buggy_lines = sorted(buggy_lines)
print("removing lines that has errors", buggy_lines)
file_lines.pop(buggy_lines[0]-1)
buggy_line = buggy_lines[0]-1
print("reversing tex line that has errors", buggy_line)
# 重组,逆转出错的段落
if buggy_line not in fixed_line:
fixed_line.append(buggy_line)
lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
f.writelines(file_lines)
f.write(final_tex)
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
except:
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1]
def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
import os, time
current_dir = os.getcwd()
n_fix = 1
fixed_line = []
max_try = 32
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}如果程序停顿5分钟以上请直接去该路径下取回翻译结果或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
@ -723,6 +334,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
while True:
import os
may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
shutil.copyfile(may_exist_bbl, target_bbl)
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
@ -756,7 +371,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
# <---------- 检查结果 ----------->
results_ = ""
original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
@ -773,9 +387,19 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
# 将两个PDF拼接
if original_pdf_success:
try:
from .latex_toolbox import merge_pdfs
concat_pdf = pj(work_folder_modified, f'comparison.pdf')
merge_pdfs(origin_pdf, result_pdf, concat_pdf)
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
except Exception as e:
pass
return True # 成功啦
else:
if n_fix>=max_try: break
@ -787,6 +411,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
tex_name_pure=f'{main_file_modified}',
n_fix=n_fix,
work_folder_modified=work_folder_modified,
fixed_line=fixed_line
)
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break
@ -794,4 +419,29 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
return False # 失败啦
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
import shutil
from ..crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
orig = ""
trans = ""
final = []
for c,r in zip(sp_file_contents, sp_file_result):
final.append(c)
final.append(r)
for i, k in enumerate(final):
if i%2==0:
orig = k
if i%2==1:
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())

View File

@ -0,0 +1,456 @@
import os, shutil
import re
import numpy as np
PRESERVE = 0
TRANSFORM = 1
pj = os.path.join
class LinkedListNode():
"""
Linked List Node
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
self.range = None
# self.begin_line = 0
# self.begin_char = 0
def convert_to_linklist(text, mask):
root = LinkedListNode("", preserve=True)
current_node = root
for c, m, i in zip(text, mask, range(len(text))):
if (m==PRESERVE and current_node.preserve) \
or (m==TRANSFORM and not current_node.preserve):
# add
current_node.string += c
else:
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
current_node = current_node.next
return root
def post_process(root):
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack fix')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
# 屏蔽空行和太短的句子
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
node = node.next
if node is None: break
node = root
while True:
if node.next and node.preserve and node.next.preserve:
node.string += node.next.string
node.next = node.next.next
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 标注节点的行数范围
node = root
n_line = 0
expansion = 2
while True:
n_l = node.string.count('\n')
node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围
n_line = n_line+n_l
node = node.next
if node is None: break
return root
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = -1
p = begin = end = res.regs[0][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p+1
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
"""
pattern_compile = re.compile(pattern, flags)
def search_with_line_limit(text, mask):
for res in pattern_compile.finditer(text):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
this, this_mask = search_with_line_limit(this, this_mask)
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
else:
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
return text, mask
return search_with_line_limit(text, mask)
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Latex Merge File
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def find_main_tex_file(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个。
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
else:
continue
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.lstrip().startswith("%"):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
def find_tex_file_ignore_case(fp):
dir_name = os.path.dirname(fp)
base_name = os.path.basename(fp)
if not base_name.endswith('.tex'): base_name+='.tex'
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
# go case in-sensitive
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
"""
Merge Tex project recrusively
"""
main_file = rm_comments(main_file)
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp}Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
Merge Tex project recrusively
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
main_file = rm_comments(main_file)
if mode == 'translate_zh':
# find paper documentclass
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find documentclass statement!"
position = match.end()
add_ctex = '\\usepackage{ctex}\n'
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
return main_file
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Post process
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
# walk and replace any _ without \
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
def compute_brace_level(string):
# this function count the number of { and }
brace_level = 0
for c in string:
if c == "{": brace_level += 1
elif c == "}": brace_level -= 1
return brace_level
def join_most(tex_t, tex_o):
# this function join translated string and original string when something goes wrong
p_t = 0
p_o = 0
def find_next(string, chars, begin):
p = begin
while p < len(string):
if string[p] in chars: return p, string[p]
p += 1
return None, None
while True:
res1, char = find_next(tex_o, ['{','}'], p_o)
if res1 is None: break
res2, char = find_next(tex_t, [char], p_t)
if res2 is None: break
p_o = res1 + 1
p_t = res2 + 1
return tex_t[:p_t] + tex_o[p_o:]
if compute_brace_level(final_tex) != compute_brace_level(node_string):
# 出问题了,还原部分原文,保证括号正确
final_tex = join_most(final_tex, node_string)
return final_tex
def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def merge_pdfs(pdf1_path, pdf2_path, output_path):
import PyPDF2
Percent = 0.8
# Open the first PDF file
with open(pdf1_path, 'rb') as pdf1_file:
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
# Open the second PDF file
with open(pdf2_path, 'rb') as pdf2_file:
pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
# Create a new PDF file to store the merged pages
output_writer = PyPDF2.PdfFileWriter()
# Determine the number of pages in each PDF file
num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
# Merge the pages from the two PDF files
for page_num in range(num_pages):
# Add the page from the first PDF file
if page_num < pdf1_reader.numPages:
page1 = pdf1_reader.getPage(page_num)
else:
page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
# Add the page from the second PDF file
if page_num < pdf2_reader.numPages:
page2 = pdf2_reader.getPage(page_num)
else:
page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
# Create a new empty page with double width
new_page = PyPDF2.PageObject.createBlankPage(
width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
)
new_page.mergeTranslatedPage(page1, 0, 0)
new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
output_writer.addPage(new_page)
# Save the merged PDF file
with open(output_path, 'wb') as output_file:
output_writer.write(output_file)

View File

@ -23,7 +23,7 @@ class AliyunASR():
pass
def test_on_close(self, *args):
# print("on_close: args=>{}".format(args))
self.aliyun_service_ok = False
pass
def test_on_result_chg(self, message, *args):
@ -50,7 +50,7 @@ class AliyunASR():
rad.clean_up()
temp_folder = tempfile.gettempdir()
TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
self.aliyun_service_ok = True
URL="wss://nls-gateway.aliyuncs.com/ws/v1"
sr = nls.NlsSpeechTranscriber(
url=URL,
@ -86,4 +86,8 @@ class AliyunASR():
for i in slices: sr.send_audio(bytes(i))
else:
time.sleep(0.1)
if not self.aliyun_service_ok:
self.stop = True
self.stop_msg = 'Aliyun音频服务异常请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
r = sr.stop()

View File

@ -144,11 +144,11 @@ def 下载arxiv论文并翻译摘要(txt, llm_kwargs, plugin_kwargs, chatbot, hi
# 尝试导入依赖,如果缺少依赖,则给出安装建议
try:
import pdfminer, bs4
import bs4
except:
report_execption(chatbot, history,
a = f"解析项目: {txt}",
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade pdfminer beautifulsoup4```。")
b = f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade beautifulsoup4```。")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return

View File

@ -12,7 +12,7 @@ def write_chat_to_file(chatbot, history=None, file_name=None):
file_name = 'chatGPT对话历史' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.html'
os.makedirs('./gpt_log/', exist_ok=True)
with open(f'./gpt_log/{file_name}', 'w', encoding='utf8') as f:
from theme.theme import advanced_css
from themes.theme import advanced_css
f.write(f'<!DOCTYPE html><head><meta charset="utf-8"><title>对话历史</title><style>{advanced_css}</style></head>')
for i, contents in enumerate(chatbot):
for j, content in enumerate(contents):

View File

@ -14,9 +14,11 @@ class WatchDog():
self.bark_fn = bark_fn
self.interval = interval
self.msg = msg
self.kill_dog = False
def watch(self):
while True:
if self.kill_dog: break
if time.time() - self.last_feed > self.timeout:
if len(self.msg) > 0: print(self.msg)
self.bark_fn()
@ -87,6 +89,9 @@ class InterviewAssistant(AliyunASR):
def __del__(self):
self.stop = True
self.stop_msg = ""
self.commit_wd.kill_dog = True
self.plugin_wd.kill_dog = True
def init(self, chatbot):
# 初始化音频采集线程
@ -119,7 +124,7 @@ class InterviewAssistant(AliyunASR):
self.commit_wd = WatchDog(timeout=self.commit_after_pause_n_second, bark_fn=self.no_audio_for_a_while, interval=0.2)
self.commit_wd.begin_watch()
while True:
while not self.stop:
self.event_on_result_chg.wait(timeout=0.25) # run once every 0.25 second
chatbot = self.agt.update_chatbot(chatbot) # 将子线程的gpt结果写入chatbot
history = chatbot2history(chatbot)
@ -158,6 +163,8 @@ class InterviewAssistant(AliyunASR):
chatbot.append(["[请讲话]", "[正在等您说完问题]"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
if len(self.stop_msg) != 0:
raise RuntimeError(self.stop_msg)

View File

@ -63,7 +63,7 @@ services:
version: '3'
services:
gpt_academic_with_rwkv:
image: fuqingxu/gpt_academic:jittorllms
image: ghcr.io/binary-husky/gpt_academic_jittorllms:master
environment:
# 请查阅 `config.py` 以查看所有的配置信息
API_KEY: ' sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,fkxxxxxx-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx '
@ -85,28 +85,13 @@ services:
# 与宿主的网络融合
network_mode: "host"
# 使用代理网络拉取最新代码
# command: >
# bash -c " truncate -s -1 /etc/proxychains.conf &&
# echo \"socks5 127.0.0.1 10880\" >> /etc/proxychains.conf &&
# echo '[gpt-academic] 正在从github拉取最新代码...' &&
# proxychains git pull &&
# echo '[jittorllms] 正在从github拉取最新代码...' &&
# proxychains git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force &&
# python3 -u main.py"
# 不使用代理网络拉取最新代码
command: >
bash -c " echo '[gpt-academic] 正在从github拉取最新代码...' &&
git pull &&
pip install -r requirements.txt &&
echo '[jittorllms] 正在从github拉取最新代码...' &&
git --git-dir=request_llm/jittorllms/.git --work-tree=request_llm/jittorllms pull --force &&
python3 -u main.py"
python3 -u main.py
## ===================================================
## 【方案四】 chatgpt + Latex
## 【方案四】 ChatGPT + Latex
## ===================================================
version: '3'
services:

View File

@ -26,7 +26,7 @@ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
RUN $useProxyNetwork python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu113
# 下载分支
WORKDIR /gpt
RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git -b jittor
RUN $useProxyNetwork git clone https://github.com/binary-husky/chatgpt_academic.git
WORKDIR /gpt/chatgpt_academic
RUN $useProxyNetwork python3 -m pip install -r requirements.txt
RUN $useProxyNetwork python3 -m pip install -r request_llm/requirements_chatglm.txt

View File

@ -13,7 +13,7 @@ RUN python3 -m pip install torch --extra-index-url https://download.pytorch.org/
# 下载分支
WORKDIR /gpt
RUN git clone https://github.com/binary-husky/chatgpt_academic.git -b jittor
RUN git clone https://github.com/binary-husky/chatgpt_academic.git
WORKDIR /gpt/chatgpt_academic
RUN python3 -m pip install -r requirements.txt
RUN python3 -m pip install -r request_llm/requirements_chatglm.txt

Binary file not shown.

View File

@ -1956,5 +1956,134 @@
"填入ENGINE": "Fill in ENGINE",
"填入api版本": "Fill in the API version",
"中文Bing版": "Chinese Bing version",
"当前支持的格式包括": "Currently supported formats include"
"当前支持的格式包括": "Currently supported formats include",
"交互功能模板函数": "InteractiveFunctionTemplateFunction",
"交互功能函数模板": "InteractiveFunctionFunctionTemplate",
"语音助手": "VoiceAssistant",
"微调数据集生成": "FineTuneDatasetGeneration",
"chatglm微调工具": "ChatGLMFineTuningTool",
"启动微调": "StartFineTuning",
"请讲话": "Please speak",
"正在听您讲话": "Listening to you",
"对这个人外貌、身处的环境、内心世界、过去经历进行描写": "Describe the appearance, environment, inner world, and past experiences of this person",
"请向下翻": "Please scroll down",
"实时音频采集": "Real-time audio collection",
"找不到": "Not found",
"在一个异步线程中采集音频": "Collect audio in an asynchronous thread",
"azure和api2d请求源": "Azure and API2D request source",
"等待ChatGLMFT响应中": "Waiting for ChatGLMFT response",
"如果使用ChatGLM2微调模型": "If using ChatGLM2 fine-tuning model",
"把文件复制过去": "Copy the file over",
"可选": "Optional",
"ChatGLMFT响应异常": "ChatGLMFT response exception",
"上传本地文件/压缩包供函数插件调用": "Upload local files/compressed packages for function plugin calls",
"例如 f37f30e0f9934c34a992f6f64f7eba4f": "For example, f37f30e0f9934c34a992f6f64f7eba4f",
"正在等您说完问题": "Waiting for you to finish the question",
"解除插件状态": "Release plugin status",
"详情见https": "See details at https",
"避免线程阻塞": "Avoid thread blocking",
"先上传数据集": "Upload dataset first",
"请直接提交即可": "Submit directly",
"Call ChatGLMFT fail 不能正常加载ChatGLMFT的参数": "Call ChatGLMFT fail, cannot load ChatGLMFT parameters",
"插件可读取“输入区”文本/路径作为参数": "The plugin can read text/path in the input area as parameters",
"给出指令": "Give instructions",
"暂不提交": "Do not submit for now",
"如 绿帽子*深蓝色衬衫*黑色运动裤": "E.g. green hat * dark blue shirt * black sports pants",
"阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https": "Aliyun real-time speech recognition has high configuration difficulty and is only recommended for advanced users. Refer to https",
"ChatGLMFT尚未加载": "ChatGLMFT has not been loaded yet",
"输入 clear 以清空对话历史": "Enter 'clear' to clear the conversation history",
"可以将自身的状态存储到cookie中": "You can store your own status in cookies",
"填入你亲手写的部署名": "Fill in the deployment name you wrote by yourself",
"该选项即将被弃用": "This option will be deprecated soon",
"代理网络配置": "Proxy network configuration",
"每秒采样数量": "Number of samples per second",
"使用时": "When using",
"想象一个穿着者": "Imagine a wearer",
"如果已经存在": "If it already exists",
"例如您可以将以下命令复制到下方": "For example, you can copy the following command below",
"正在锁定插件": "Locking plugin",
"使用": "Use",
"读 docs\\use_azure.md": "Read docs\\use_azure.md",
"开始最终总结": "Start final summary",
"openai的官方KEY需要伴随组织编码": "Openai's official KEY needs to be accompanied by organizational code",
"将子线程的gpt结果写入chatbot": "Write the GPT result of the sub-thread into the chatbot",
"Arixv论文精细翻译": "Fine translation of Arixv paper",
"开始接收chatglmft的回复": "Start receiving replies from chatglmft",
"请先将.doc文档转换为.docx文档": "Please convert .doc documents to .docx documents first",
"避免多用户干扰": "Avoid multiple user interference",
"清空label": "Clear label",
"解除插件锁定": "Unlock plugin",
"请以以下方式load模型": "Please load the model in the following way!!!",
"没给定指令": "No instruction given",
"100字以内": "Within 100 words",
"获取关键词": "Get keywords",
"欢迎使用 MOSS 人工智能助手!": "Welcome to use MOSS AI assistant!",
"音频助手": "Audio assistant",
"上传Latex项目": "Upload Latex project",
"对话助手函数插件": "Chat assistant function plugin",
"如果一句话小于7个字": "If a sentence is less than 7 words",
"640个字节为一组": "640 bytes per group",
"右下角更换模型菜单中可切换openai": "OpenAI can be switched in the model menu in the lower right corner",
"双手离开鼠标键盘吧": "Take your hands off the mouse and keyboard",
"先删除": "Delete first",
"如果要使用ChatGLMFT": "If you want to use ChatGLMFT",
"例如 RoPlZrM88DnAFkZK": "For example, RoPlZrM88DnAFkZK",
"提取总结": "Extract summary",
"ChatGLMFT消耗大量的内存": "ChatGLMFT consumes a lot of memory",
"格式如org-123456789abcdefghijklmno的": "In the format of org-123456789abcdefghijklmno",
"在执行完成之后": "After execution is complete",
"此处填API密钥": "Fill in the API key here",
"chatglmft 没有 sys_prompt 接口": "ChatGLMFT does not have a sys_prompt interface",
"用第二人称": "Use the second person",
"Chuanhu-Small-and-Beautiful主题": "Chuanhu-Small-and-Beautiful theme",
"请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期": "Please check if ALIYUN_TOKEN and ALIYUN_APPKEY have expired",
"还需要填写组织": "You also need to fill in the organization",
"会直接转到该函数": "Will directly jump to the function",
"初始化插件状态": "Initializing plugin status",
"插件锁定中": "Plugin is locked",
"如果这里报错": "If there is an error here",
"本地Latex论文精细翻译": "Local Latex paper fine translation",
"极少数情况下": "In very few cases",
"首先你在中文语境下通读整篇论文": "First, read the entire paper in a Chinese context",
"点击“停止”键可终止程序": "Click the 'Stop' button to terminate the program",
"建议排查": "Suggested troubleshooting",
"没有阿里云语音识别APPKEY和TOKEN": "No Aliyun voice recognition APPKEY and TOKEN",
"避免遗忘导致死锁": "Avoid forgetting to cause deadlock",
"第一次调用": "First call",
"解决插件锁定时的界面显示问题": "Solve the interface display problem when the plugin is locked",
"初始化音频采集线程": "Initialize audio capture thread",
"找不到微调模型检查点": "Cannot find fine-tuning model checkpoint",
"色彩主体": "Color theme",
"上传文件自动修正路径": "Automatically correct the path when uploading files",
"将文件添加到chatbot cookie中": "Add files to chatbot cookie",
"正常状态": "Normal state",
"建议使用英文单词": "Suggest using English words",
"Aliyun音频服务异常": "Aliyun audio service exception",
"格式如org-xxxxxxxxxxxxxxxxxxxxxxxx": "Format like org-xxxxxxxxxxxxxxxxxxxxxxxx",
"GPT 学术优化": "GPT academic optimization",
"要求": "Requirement",
"赋予插件状态": "Assign plugin status",
"等待GPT响应": "Waiting for GPT response",
"MOSS can understand and communicate fluently in the language chosen by the user such as English and 中文. MOSS can perform any language-based tasks.": "MOSS can understand and communicate fluently in the language chosen by the user such as English and Chinese. MOSS can perform any language-based tasks.",
"我将为您查找相关壁纸": "I will search for related wallpapers for you",
"当下一次用户提交时": "When the next user submits",
"赋予插件锁定 锁定插件回调路径": "Assign plugin lock, lock plugin callback path",
"处理个别特殊插件的锁定状态": "Handle the lock status of individual special plugins",
"add gpt task 创建子线程请求gpt": "Add GPT task, create sub-thread to request GPT",
"等待用户的再次调用": "Waiting for the user to call again",
"只读": "Read-only",
"用于灵活调整复杂功能的各种参数": "Various parameters used to flexibly adjust complex functions",
"输入 stop 以终止对话": "Enter stop to terminate the conversation",
"缺少ChatGLMFT的依赖": "Missing dependency of ChatGLMFT",
"找 API_ORG 设置项": "Find API_ORG setting item",
"检查config中的AVAIL_LLM_MODELS选项": "Check the AVAIL_LLM_MODELS option in config",
"对这个人外貌、身处的环境、内心世界、人设进行描写": "Describe the appearance, environment, inner world, and character of this person.",
"请输入关键词": "Please enter a keyword.",
"!!!如果需要运行量化版本": "!!! If you need to run the quantitative version.",
"为每一位访问的用户赋予一个独一无二的uuid编码": "Assign a unique uuid code to each visiting user.",
"由于提问含不合规内容被Azure过滤": "Due to Azure filtering out questions containing non-compliant content.",
"欢迎使用 MOSS 人工智能助手!输入内容即可进行对话": "Welcome to use MOSS AI assistant! Enter the content to start the conversation.",
"记住当前的label": "Remember the current label.",
"不能正常加载ChatGLMFT的参数": "Cannot load ChatGLMFT parameters normally!",
"建议直接在API_KEY处填写": "It is recommended to fill in directly at API_KEY."
}

View File

@ -150,26 +150,7 @@
"使用中文回答我的问题": "使用中文回答我的問題",
"备份一个文件": "備份一個文件",
"未知": "未知",
"如.md": "#",
"**输入参数说明**": "#",
"如果这裡拋出異常": "#",
"多線程操作已經開始": "#",
"備份和下載": "#",
"新版本可用": "#",
"將要忽略匹配的文件後綴": "#",
"可調節線程池的大小避免openai的流量限制錯誤": "#",
"使用Unsplash API": "#",
"ChatGPT綜合": "#",
"從摘要中提取高價值信息": "#",
"借助此參數": "#",
"知乎": "#",
"其他錯誤": "#",
"退出": "#",
"對話歷史寫入": "#",
"問詢記錄": "#",
"依次訪問網頁": "#",
"NewBing響應異常": "#",
"jittorllms尚未加載": "#",
"其他錯誤": "其他錯誤",
"等待NewBing响应": "等待NewBing回應",
"找不到任何CSharp文件": "找不到任何CSharp檔案",
"插件demo": "插件範例",
@ -300,12 +281,12 @@
"上傳本地文件可供紅色函數插件調用": "上傳本地文件供紅色函數插件調用",
"生成圖像": "生成圖像",
"追加歷史": "追加歷史",
"網絡代理狀態": "網代理狀態",
"網絡代理狀態": "網代理狀態",
"不需要再次轉化": "不需要再次轉換",
"帶超時倒計時": "帶有超時倒數計時",
"保存當前對話": "儲存目前對話",
"等待響應": "等待回應",
"依賴檢測通過": "依賴檢通過",
"依賴檢測通過": "依賴檢通過",
"如果要使用ChatGLM": "如果要使用ChatGLM",
"對IPynb文件進行解析": "對IPynb檔案進行解析",
"先切換模型到openai或api2d": "先切換模型到openai或api2d",
@ -411,7 +392,7 @@
"中转网址预览": "中轉網址預覽",
"自动截断": "自動截斷",
"当無法用標點、空行分割時": "當無法用標點、空行分割時",
"意外Json結構": "意外Json結構",
"意外Json結構": "意外Json結構",
"需要讀取和清理文本的pdf文件路徑": "需要讀取和清理文本的pdf文件路徑",
"HotReload的裝飾器函數": "HotReload的裝飾器函數",
"chatGPT 分析報告": "chatGPT 分析報告",
@ -423,7 +404,7 @@
"這個bug沒找到觸發條件": "這個bug沒找到觸發條件",
"喚起高級參數輸入區": "喚起高級參數輸入區",
"但大部分場合下並不需要修改": "但大部分場合下並不需要修改",
"盡量是完整的一個section": "盡量完整的一個section",
"盡量是完整的一個section": "盡量選擇完整的一個章節",
"如果OpenAI不響應": "如果OpenAI不響應",
"等文本特殊符號轉換為其基本形式來對文本進行歸一化處理": "等文本特殊符號轉換為其基本形式來對文本進行歸一化處理",
"你的回答必須簡單明了": "你的回答必須簡單明了",
@ -517,7 +498,7 @@
"正在提取摘要並下載PDF文檔……": "正在提取摘要並下載PDF文件……",
"1. 對原始文本進行歸一化處理": "1. 正規化原始文本",
"問題": "問題",
"用於基礎的對話功能": "基本對話功能",
"用於基礎的對話功能": "用於基礎的對話功能",
"獲取設置": "獲取設置",
"如果缺少依賴": "如果缺少依賴項",
"第6步": "第6步",
@ -1111,26 +1092,9 @@
"清理规则包括": "清理規則包括",
"新版配置": "新版配置",
"如果有": "如果有",
"高級參數輸入區": "#",
"您提供的api-key不滿足要求": "#",
"“喂狗”": "#",
"有線程鎖": "#",
"解析整個CSharp項目": "#",
"上下文管理器必須實現兩個方法": "#",
"Call MOSS fail 不能正常加載MOSS的參數": "#",
"獲取圖片URL": "#",
"輸入部分太自由": "#",
"Not enough point. API2D賬戶點數不足": "#",
"網絡錯誤": "#",
"請開始多線程操作": "#",
"authors獲取失敗": "#",
"、地址": "#",
"根據以上分析": "#",
"1、英文題目2、中文題目翻譯3、作者4、arxiv公開": "#",
"一些普通功能模塊": "#",
"參數簡單": "#",
"具備以下功能": "#",
"優先級2. 獲取config_private中的配置": "#",
"Call MOSS fail 不能正常加載MOSS的參數": "Call MOSS fail 不能正常加載MOSS的參數",
"根據以上分析": "根據以上分析",
"一些普通功能模塊": "一些普通功能模塊",
"汇总报告如何远程获取": "如何遠程獲取匯總報告",
"热更新prompt": "熱更新提示",
"插件调度异常": "插件調度異常",
@ -1191,26 +1155,9 @@
"函数插件区": "函數插件區",
"*** API_KEY 导入成功": "*** API_KEY 導入成功",
"请对下面的程序文件做一个概述文件名是": "請對下面的程序文件做一個概述文件名是",
"替換跨行的連詞": "#",
"內容太長了都會觸發token數量溢出的錯誤": "#",
"尚未完成全部響應": "#",
"生成帶有段落標籤的HTML代碼": "#",
"函數熱更新是指在不停止程序運行的情況下": "#",
"將Unsplash API中的PUT_YOUR_QUERY_HERE替換成描述該事件的一個最重要的單詞": "#",
"沒有提供高級參數功能說明": "#",
"條": "#",
"請刷新界面重試": "#",
"和openai的連接容易斷掉": "#",
"使用 Unsplash API": "#",
"完成情況": "#",
"迭代上一次的結果": "#",
"每個線程都要“餵狗”": "#",
"最多收納多少個網頁的結果": "#",
"日": "#",
"第4步": "#",
"找不到任何python文件": "#",
"經過充分測試": "#",
"缺少的依賴": "#",
"內容太長了都會觸發token數量溢出的錯誤": "內容太長了都會觸發token數量溢出的錯誤",
"沒有提供高級參數功能說明": "未提供高級參數功能說明",
"和openai的連接容易斷掉": "和openai的連接容易斷掉",
"分组+迭代处理": "分組+迭代處理",
"安装Newbing的依赖": "安裝Newbing的依賴",
"批": "批",
@ -1511,5 +1458,760 @@
"包括": "包括",
"或者": "或者",
"并执行函数的新版本": "並執行函數的新版本",
"论文": "論文"
"论文": "論文",
"解析一个Golang项目": "ParseAGolangProject",
"Latex英文纠错": "LatexEnglishCorrection",
"连接bing搜索回答问题": "ConnectToBingSearchForAnswer",
"联网的ChatGPT_bing版": "ChatGPT_BingVersionOnline",
"总结音视频": "SummarizeAudioAndVideo",
"动画生成": "GenerateAnimations",
"数学动画生成manim": "GenerateMathematicalAnimationsWithManim",
"Markdown翻译指定语言": "TranslateMarkdownToSpecifiedLanguage",
"知识库问答": "KnowledgeBaseQA",
"Langchain知识库": "LangchainKnowledgeBase",
"读取知识库作答": "ReadKnowledgeBaseAndAnswerQuestions",
"交互功能模板函数": "InteractiveFunctionTemplateFunctions",
"交互功能函数模板": "InteractiveFunctionFunctionTemplates",
"Latex英文纠错加PDF对比": "LatexEnglishCorrectionWithPDFComparison",
"Latex输出PDF结果": "OutputPDFFromLatex",
"Latex翻译中文并重新编译PDF": "TranslateLatexToChineseAndRecompilePDF",
"语音助手": "VoiceAssistant",
"微调数据集生成": "FineTuneDatasetGeneration",
"chatglm微调工具": "ChatGLM_FineTuningTool",
"启动微调": "StartFineTuning",
"sprint亮靛": "SprintLiangDian",
"寻找Latex主文件": "FindLatexMainFile",
"专业词汇声明": "ProfessionalTerminologyDeclaration",
"Latex精细分解与转化": "LatexFineDecompositionAndConversion",
"编译Latex": "CompileLatex",
"正在等您说完问题": "正在等您說完問題",
"最多同时执行5个": "最多同時執行5個",
"将文件复制一份到下载区": "將檔案複製一份到下載區",
"您接下来不能再使用其他插件了": "您接下來不能再使用其他插件了",
"如 绿帽子*深蓝色衬衫*黑色运动裤": "如 綠帽子*深藍色襯衫*黑色運動褲",
"首先你在中文语境下通读整篇论文": "首先您在中文語境下通讀整篇論文",
"根据给定的切割时长将音频文件切割成多个片段": "根據給定的切割時長將音訊檔切割成多個片段",
"接下来两句话只显示在界面上": "接下來兩句話只顯示在介面上",
"清空label": "清空標籤",
"正在尝试自动安装": "正在嘗試自動安裝",
"MOSS消耗大量的内存": "MOSS消耗大量的記憶體",
"如果这里报错": "如果這裡報錯",
"其他类型文献转化效果未知": "其他類型文獻轉換效果未知",
"ChatGPT综合": "ChatGPT綜合",
"音频文件的路径": "音訊檔案的路徑",
"执行错误": "執行錯誤",
"因此选择GenerateImage函数": "因此選擇GenerateImage函數",
"从摘要中提取高价值信息": "從摘要中提取高價值資訊",
"使用英文": "使用英文",
"是否在提交时自动清空输入框": "是否在提交時自動清空輸入框",
"生成数学动画": "生成數學動畫",
"正在加载Claude组件": "正在載入Claude元件",
"参数说明": "參數說明",
"建议排查": "建議排查",
"将消耗较长时间下载中文向量化模型": "將消耗較長時間下載中文向量化模型",
"test_LangchainKnowledgeBase读取": "test_LangchainKnowledgeBase讀取",
"安装Claude的依赖": "安裝Claude的相依性",
"以下所有配置也都支持利用环境变量覆写": "以下所有配置也都支持利用環境變數覆寫",
"需要被切割的音频文件名": "需要被切割的音頻文件名",
"保存当前对话": "保存當前對話",
"功能、贡献者": "功能、貢獻者",
"Chuanhu-Small-and-Beautiful主题": "Chuanhu-小而美主題",
"等待Claude响应": "等待Claude響應",
"其他模型转化效果未知": "其他模型轉換效果未知",
"版权归原文作者所有": "版權歸原文作者所有",
"回答完问题后": "回答完問題後",
"请先上传文件素材": "請先上傳文件素材",
"上传本地文件/压缩包供函数插件调用": "上傳本地文件/壓縮包供函數插件調用",
"P.S. 顺便把Latex的注释去除": "P.S. 順便把Latex的註釋去除",
"您提供的api-key不满足要求": "您提供的api-key不滿足要求",
"切割音频文件": "切割音頻文件",
"对不同latex源文件扣分": "對不同latex源文件扣分",
"以下是一篇学术论文的基础信息": "以下是一篇學術論文的基礎信息",
"问题": "問題",
"待注入的知识库名称id": "待注入的知識庫名稱id",
"”的主要内容": "”的主要內容",
"获取设置": "獲取設置",
"str类型": "str類型",
"多线程": "多線程",
"尝试执行Latex指令失败": "嘗試執行Latex指令失敗",
"然后再写一段英文摘要": "然後再寫一段英文摘要",
"段音频的主要内容": "段音頻的主要內容",
"临时地激活代理网络": "臨時地激活代理網絡",
"网络的远程文件": "網絡的遠程文件",
"不能正常加载ChatGLMFT的参数": "無法正常載入ChatGLMFT的參數",
"正在编译PDF文档": "正在編譯PDF文件",
"等待ChatGLMFT响应中": "等待ChatGLMFT回應中",
"将": "將",
"片段": "片段",
"修复括号": "修復括號",
"条": "條",
"建议直接在API_KEY处填写": "建議直接在API_KEY處填寫",
"根据需要切换prompt": "根據需要切換prompt",
"使用": "使用",
"请输入要翻译成哪种语言": "請輸入要翻譯成哪種語言",
"实际得到格式": "實際得到格式",
"例如 f37f30e0f9934c34a992f6f64f7eba4f": "例如 f37f30e0f9934c34a992f6f64f7eba4f",
"请切换至“KnowledgeBaseQA”插件进行知识库访问": "請切換至“KnowledgeBaseQA”插件進行知識庫訪問",
"用户填3": "用戶填3",
"远程云服务器部署": "遠程雲服務器部署",
"未知指令": "未知指令",
"每个线程都要“喂狗”": "每個線程都要“喂狗”",
"该项目的Latex主文件是": "該項目的Latex主文件是",
"设置OpenAI密钥和模型": "設置OpenAI密鑰和模型",
"填入你亲手写的部署名": "填入你親手寫的部署名",
"仅调试": "僅調試",
"依赖不足": "依賴不足",
"右下角更换模型菜单中可切换openai": "右下角更換模型菜單中可切換openai",
"解析整个CSharp项目": "解析整個CSharp項目",
"唤起高级参数输入区": "喚起高級參數輸入區",
"这个bug没找到触发条件": "這個bug沒找到觸發條件",
"========================================= 插件主程序2 =====================================================": "========================================= 插件主程序2 =====================================================",
"经过充分测试": "經過充分測試",
"该文件中主要包含三个函数": "該文件中主要包含三個函數",
"您可以到Github Issue区": "您可以到Github Issue區",
"避免线程阻塞": "避免線程阻塞",
"吸收iffalse注释": "吸收iffalse註釋",
"from crazy_functions.虚空终端 import 终端": "from crazy_functions.虛空終端 import 終端",
"异步方法": "異步方法",
"块元提取": "塊元提取",
"Your account is not active. OpenAI以账户失效为由": "您的帳戶未啟用。OpenAI以帳戶失效為由",
"还原部分原文": "還原部分原文",
"如果要使用Claude": "如果要使用Claude",
"把文件复制过去": "把文件複製過去",
"解压失败! 需要安装pip install rarfile来解压rar文件": "解壓失敗需要安裝pip install rarfile來解壓rar文件",
"正在锁定插件": "正在鎖定插件",
"输入 clear 以清空对话历史": "輸入 clear 以清空對話歷史",
"P.S. 但愿没人把latex模板放在里面传进来": "P.S. 但願沒人把latex模板放在裡面傳進來",
"实时音频采集": "實時音頻採集",
"开始最终总结": "開始最終總結",
"拒绝服务": "拒絕服務",
"配置教程&视频教程": "配置教程&視頻教程",
"所有音频都总结完成了吗": "所有音頻都總結完成了嗎",
"返回": "返回",
"避免不小心传github被别人看到": "避免不小心傳github被別人看到",
"否则将导致每个人的Claude问询历史互相渗透": "否則將導致每個人的Claude問詢歷史互相滲透",
"提问吧! 但注意": "提問吧!但注意",
"待处理的word文档路径": "待處理的word文檔路徑",
"欢迎加REAME中的QQ联系开发者": "歡迎加REAME中的QQ聯繫開發者",
"建议暂时不要使用": "建議暫時不要使用",
"Latex没有安装": "Latex沒有安裝",
"在这里放一些网上搜集的demo": "在這裡放一些網上搜集的demo",
"实现消息发送、接收等功能": "實現消息發送、接收等功能",
"用于与with语句一起使用": "用於與with語句一起使用",
"解压失败! 需要安装pip install py7zr来解压7z文件": "解壓失敗! 需要安裝pip install py7zr來解壓7z文件",
"借助此参数": "借助此參數",
"判定为数据流的结束": "判定為數據流的結束",
"提取文件扩展名": "提取文件擴展名",
"GPT结果已输出": "GPT結果已輸出",
"读取文件": "讀取文件",
"如果OpenAI不响应": "如果OpenAI不響應",
"输入部分太自由": "輸入部分太自由",
"用于给一小段代码上代理": "用於給一小段代碼上代理",
"输入 stop 以终止对话": "輸入 stop 以終止對話",
"这个paper有个input命令文件名大小写错误": "這個paper有個input命令文件名大小寫錯誤",
"等待Claude回复的片段": "等待Claude回復的片段",
"开始": "開始",
"将根据报错信息修正tex源文件并重试": "將根據報錯信息修正tex源文件並重試",
"建议更换代理协议": "建議更換代理協議",
"递归地切割PDF文件": "遞歸地切割PDF文件",
"读 docs\\use_azure.md": "讀 docs\\use_azure.md",
"参数": "參數",
"屏蔽空行和太短的句子": "屏蔽空行和太短的句子",
"分析上述回答": "分析上述回答",
"因为在同一个频道里存在多人使用时历史消息渗透问题": "因為在同一個頻道裡存在多人使用時歷史消息滲透問題",
"使用latexdiff生成論文轉化前後對比": "使用latexdiff生成論文轉化前後對比",
"檢查結果": "檢查結果",
"請在此處追加更細緻的校錯指令": "請在此處追加更細緻的校錯指令",
"報告如何遠程獲取": "報告如何遠程獲取",
"發現已經存在翻譯好的PDF文檔": "發現已經存在翻譯好的PDF文檔",
"插件鎖定中": "插件鎖定中",
"正在精細切分latex文件": "正在精細切分latex文件",
"數學GenerateAnimations": "數學GenerateAnimations",
"上傳文件自動修正路徑": "上傳文件自動修正路徑",
"請檢查ALIYUN_TOKEN和ALIYUN_APPKEY是否過期": "請檢查ALIYUN_TOKEN和ALIYUN_APPKEY是否過期",
"上傳Latex項目": "上傳LaTeX項目",
"Aliyun音頻服務異常": "Aliyun音頻服務異常",
"為了防止大語言模型的意外謬誤產生擴散影響": "為了防止大語言模型的意外謬誤產生擴散影響",
"調用Claude時": "調用Claude時",
"解除插件鎖定": "解除插件鎖定",
"暗色模式 / 亮色模式": "暗色模式 / 亮色模式",
"只有第二步成功": "只有第二步成功",
"分析结果": "分析結果",
"用第二人称": "使用第二人稱",
"详情见https": "詳情請見https",
"记住当前的label": "記住當前的標籤",
"当无法用标点、空行分割时": "當無法用標點符號、空行分割時",
"如果分析错误": "如果分析錯誤",
"如果有必要": "如果有必要",
"不要修改!! 高危设置!通过修改此设置": "不要修改!! 高危設置!通過修改此設置",
"ChatGLMFT消耗大量的内存": "ChatGLMFT消耗大量的內存",
"摘要生成后的文档路径": "摘要生成後的文件路徑",
"对全文进行概括": "對全文進行概述",
"LLM_MODEL是默认选中的模型": "LLM_MODEL是默認選中的模型",
"640个字节为一组": "640個字節為一組",
"获取关键词": "獲取關鍵詞",
"解析为简体中文": "解析為簡體中文",
"将 \\include 命令转换为 \\input 命令": "將 \\include 命令轉換為 \\input 命令",
"默认值为1000": "默認值為1000",
"手动指定语言": "手動指定語言",
"请登录OpenAI查看详情 https": "請登錄OpenAI查看詳情 https",
"尝试第": "嘗試第",
"每秒采样数量": "每秒採樣數量",
"加载失败!": "加載失敗!",
"方法": "方法",
"对这个人外貌、身处的环境、内心世界、过去经历进行描写": "對這個人外貌、身處的環境、內心世界、過去經歷進行描寫",
"请先将.doc文档转换为.docx文档": "請先將.doc文檔轉換為.docx文檔",
"定位主Latex文件": "定位主Latex文件",
"批量SummarizeAudioAndVideo": "批量摘要音视频",
"终端": "終端",
"即将退出": "即將退出",
"找不到": "找不到",
"正在听您讲话": "正在聆聽您講話",
"请您不要删除或修改这行警告": "請勿刪除或修改此警告",
"没有阿里云语音识别APPKEY和TOKEN": "沒有阿里雲語音識別APPKEY和TOKEN",
"临时地启动代理网络": "臨時啟動代理網絡",
"请尝试把以下指令复制到高级参数区": "請將以下指令複製到高級參數區",
"中文Bing版": "中文Bing版",
"计算文件总时长和切割点": "計算文件總時長和切割點",
"寻找主文件": "尋找主文件",
"jittorllms尚未加载": "jittorllms尚未加載",
"使用正则表达式查找半行注释": "使用正則表達式查找半行註釋",
"文档越长耗时越长": "文檔越長耗時越長",
"生成中文PDF": "生成中文PDF",
"写入文件": "寫入文件",
"第三组插件": "第三組插件",
"开始接收chatglmft的回复": "開始接收chatglmft的回覆",
"由于提问含不合规内容被Azure过滤": "由於提問含不合規內容被Azure過濾",
"安装方法https": "安裝方法https",
"是否自动处理token溢出的情况": "是否自動處理token溢出的情況",
"如果需要使用AZURE 详情请见额外文档 docs\\use_azure.md": "如果需要使用AZURE 詳情請見額外文檔 docs\\use_azure.md",
"将要忽略匹配的文件后缀": "將要忽略匹配的文件後綴",
"authors获取失败": "authors獲取失敗",
"发送到openai音频解析终端": "發送到openai音頻解析終端",
"请开始多线程操作": "請開始多線程操作",
"对这个人外貌、身处的环境、内心世界、人设进行描写": "對這個人外貌、身處的環境、內心世界、人設進行描寫",
"MOSS can understand and communicate fluently in the language chosen by the user such as English and 中文. MOSS can perform any language-based tasks.": "MOSS可以流利地理解和使用用戶選擇的語言例如英語和中文。MOSS可以執行任何基於語言的任務。",
"work_folder = Latex預處理": "設置工作目錄為Latex預處理",
"然後轉移到指定的另一個路徑中": "然後轉移到指定的另一個路徑中",
"使用Newbing": "使用Newbing",
"詳情信息見requirements.txt": "詳細信息請參閱requirements.txt",
"開始下載": "開始下載",
"多線程翻譯開始": "多線程翻譯開始",
"當前大語言模型": "當前大語言模型",
"格式如org-123456789abcdefghijklmno的": "格式如org-123456789abcdefghijklmno的",
"當下一次用戶提交時": "當下一次用戶提交時",
"需要特殊依賴": "需要特殊依賴",
"次編譯": "次編譯",
"先上傳數據集": "先上傳數據集",
"gpt寫的": "gpt寫的",
"調用緩存": "調用緩存",
"优先级1. 获取环境变量作为配置": "優先級1. 獲取環境變量作為配置",
"检查config中的AVAIL_LLM_MODELS选项": "檢查config中的AVAIL_LLM_MODELS選項",
"并且对于网络上的文件": "並且對於網絡上的文件",
"根据文本使用GPT模型生成相应的图像": "根據文本使用GPT模型生成相應的圖像",
"功能描述": "功能描述",
"翻译结果": "翻譯結果",
"需要预先pip install rarfile": "需要預先pip install rarfile",
"等待响应": "等待響應",
"我们剥离Introduction之后的部分": "我們剝離Introduction之後的部分",
"函数插件-固定按钮区": "函數插件-固定按鈕區",
"临时存储用于调试": "臨時存儲用於調試",
"比正文字体小": "比正文字體小",
"会直接转到该函数": "會直接轉到該函數",
"请以以下方式load模型": "請以以下方式load模型",
"请输入关键词": "請輸入關鍵詞",
"返回找到的第一个": "返回找到的第一個",
"高级参数输入区": "高級參數輸入區",
"精细切分latex文件": "精細切分latex文件",
"赋予插件锁定 锁定插件回调路径": "賦予插件鎖定 鎖定插件回調路徑",
"尝试下载": "嘗試下載",
"包含documentclass关键字": "包含documentclass關鍵字",
"在一个异步线程中采集音频": "在一個異步線程中採集音頻",
"先删除": "先刪除",
"则跳过GPT请求环节": "則跳過GPT請求環節",
"Not enough point. API2D账户点数不足": "Not enough point. API2D帳戶點數不足",
"如果一句话小于7个字": "如果一句話小於7個字",
"具备以下功能": "具備以下功能",
"请查看终端的输出或耐心等待": "請查看終端的輸出或耐心等待",
"对输入的word文档进行摘要生成": "對輸入的word文檔進行摘要生成",
"只读": "只讀",
"文本碎片重组为完整的tex文件": "文本碎片重組為完整的tex文件",
"通过调用conversations_open方法打开一个频道": "通過調用conversations_open方法打開一個頻道",
"对话历史文件损坏!": "對話歷史文件損壞!",
"再失败就没办法了": "再失敗就沒辦法了",
"原始PDF编译是否成功": "原始PDF編譯是否成功",
"不能正常加载jittorllms的参数": "不能正常加載jittorllms的參數",
"正在编译对比PDF": "正在編譯對比PDF",
"找不到微调模型检查点": "找不到微調模型檢查點",
"将生成的报告自动投射到文件上传区": "將生成的報告自動投射到文件上傳區",
"请对这部分内容进行语法矫正": "請對這部分內容進行語法校正",
"编译已经开始": "編譯已經開始",
"需要读取和清理文本的pdf文件路径": "需要讀取和清理文本的pdf文件路徑",
"读取文件内容到内存": "讀取文件內容到內存",
"用&符号分隔": "用&符號分隔",
"输入arxivID": "輸入arxivID",
"找 API_ORG 设置项": "找API_ORG設置項",
"分析用户提供的谷歌学术": "分析用戶提供的谷歌學術",
"欢迎使用 MOSS 人工智能助手!输入内容即可进行对话": "歡迎使用 MOSS 人工智能助手!輸入內容即可進行對話",
"段音频的第": "段音頻的第",
"没有找到任何可读取文件": "沒有找到任何可讀取文件",
"目前仅支持GPT3.5/GPT4": "目前僅支持GPT3.5/GPT4",
"为每一位访问的用户赋予一个独一无二的uuid编码": "為每一位訪問的用戶賦予一個獨一無二的uuid編碼",
"内含已经翻译的Tex文档": "內含已經翻譯的Tex文檔",
"消耗时间的函数": "消耗時間的函數",
"成功啦": "成功啦",
"环境变量配置格式见docker-compose.yml": "環境變量配置格式見docker-compose.yml",
"将每次对话记录写入Markdown格式的文件中": "將每次對話記錄寫入Markdown格式的文件中",
"报告已经添加到右侧“文件上传区”": "報告已經添加到右側“文件上傳區”",
"此处可以输入解析提示": "此處可以輸入解析提示",
"缺少MOSS的依赖": "缺少MOSS的依賴",
"仅在Windows系统进行了测试": "僅在Windows系統進行了測試",
"然后重启程序": "然後重啟程序",
"此处不修改": "此處不修改",
"输出html调试文件": "輸出html調試文件",
"6.25 加入判定latex模板的代码": "6.25 加入判定latex模板的代碼",
"提取总结": "提取總結",
"要求": "要求",
"由于最为关键的转化PDF编译失败": "由於最為關鍵的轉化PDF編譯失敗",
"除非您是论文的原作者": "除非您是論文的原作者",
"输入问题后点击该插件": "輸入問題後點擊該插件",
"该选项即将被弃用": "該選項即將被棄用",
"再列出用户可能提出的三个问题": "再列出用戶可能提出的三個問題",
"所有文件都总结完成了吗": "所有文件都總結完成了嗎",
"请稍候": "請稍候",
"向chatbot中添加简单的意外错误信息": "向chatbot中添加簡單的意外錯誤信息",
"快捷的调试函数": "快捷的調試函數",
"LatexEnglishCorrection+高亮修正位置": "Latex英文校正+高亮修正位置",
"循环监听已打开频道的消息": "循環監聽已打開頻道的消息",
"将指定目录下的PDF文件从英文翻译成中文": "將指定目錄下的PDF文件從英文翻譯成中文",
"请对下面的音频片段做概述": "請對下面的音頻片段做概述",
"openai的官方KEY需要伴隨组织编码": "openai的官方KEY需要伴隨組織編碼",
"表示频道ID": "頻道ID",
"当前支持的格式包括": "目前支援的格式包括",
"只有GenerateImage和生成图像相关": "僅限GenerateImage和生成圖像相關",
"删除中间文件夹": "刪除中間資料夾",
"解除插件状态": "解除插件狀態",
"正在预热文本向量化模组": "正在預熱文本向量化模組",
"100字以内": "限制100字內",
"如果缺少依赖": "如果缺少相依性",
"寻找主tex文件": "尋找主要tex檔案",
"gpt 多线程请求": "gpt 多線程請求",
"已知某些代码的局部作用是": "已知某些程式碼的局部作用是",
"--读取文件": "--讀取檔案",
"前面是中文冒号": "前面是中文冒號",
"*{\\scriptsize\\textbf{警告": "*{\\scriptsize\\textbf{警告",
"OpenAI所允许的最大并行过载": "OpenAI所允許的最大並行過載",
"请直接去该路径下取回翻译结果": "請直接前往該路徑取回翻譯結果",
"以免输入溢出": "以免輸入溢出",
"把某个路径下所有文件压缩": "壓縮某個路徑下的所有檔案",
"问询记录": "詢問記錄",
"Tex源文件缺失": "Tex原始檔案遺失",
"当前参数": "目前參數",
"处理markdown文本格式的转变": "處理markdown文本格式的轉換",
"尝试加载": "嘗試載入",
"请在此处给出自定义翻译命令": "請在此處提供自訂翻譯命令",
"这需要一段时间计算": "這需要一段時間計算",
"-构建知识库": "-建立知識庫",
"还需要填写组织": "還需要填寫組織",
"当前知识库内的有效文件": "當前知識庫內的有效文件",
"第一次调用": "第一次調用",
"从一批文件": "從一批文件",
"json等": "json等",
"翻译-": "翻譯-",
"编译文献交叉引用": "編譯文獻交叉引用",
"优先级2. 获取config_private中的配置": "優先級2. 獲取config_private中的配置",
"可选": "可選",
"我们": "我們",
"编译结束": "編譯結束",
"或代理节点": "或代理節點",
"chatGPT 分析报告": "chatGPT 分析報告",
"调用openai api 使用whisper-1模型": "調用openai api 使用whisper-1模型",
"这段代码定义了一个名为TempProxy的空上下文管理器": "這段代碼定義了一個名為TempProxy的空上下文管理器",
"生成的视频文件路径": "生成的視頻文件路徑",
"请直接提交即可": "請直接提交即可",
"=================================== 工具函数 ===============================================": "=================================== 工具函數 ===============================================",
"报错信息如下. 如果是与网络相关的问题": "報錯信息如下. 如果是與網絡相關的問題",
"python 版本建议3.9+": "python 版本建議3.9+",
"多线程函数插件中": "多線程函數插件中",
"对话助手函数插件": "對話助手函數插件",
"或者重启之后再度尝试": "或者重啟之後再度嘗試",
"拆分过长的latex片段": "拆分過長的latex片段",
"调用whisper模型音频转文字": "調用whisper模型音頻轉文字",
"失败啦": "失敗啦",
"正在编译PDF": "正在編譯PDF",
"请刷新界面重试": "請刷新界面重試",
"模型参数": "模型參數",
"写出文件": "寫出文件",
"第二组插件": "第二組插件",
"在多Tex文档中": "在多Tex文檔中",
"有线程锁": "有線程鎖",
"释放线程锁": "釋放線程鎖",
"读取优先级": "讀取優先級",
"Linux下必须使用Docker安装": "Linux下必須使用Docker安裝",
"例如您可以将以下命令复制到下方": "例如您可以將以下命令複製到下方",
"导入依赖失败": "導入依賴失敗",
"给出一些判定模板文档的词作为扣分项": "給出一些判定模板文檔的詞作為扣分項",
"等待Claude响应中": "等待Claude響應中",
"Call ChatGLMFT fail 不能正常加载ChatGLMFT的参数": "Call ChatGLMFT fail 不能正常加載ChatGLMFT的參數",
"但本地存储了以下历史文件": "但本地存儲了以下歷史文件",
"如果存在调试缓存文件": "如果存在調試緩存文件",
"如果这里抛出异常": "如果這裡拋出異常",
"详见项目主README.md": "詳見項目主README.md",
"作者": "作者",
"现在您点击任意“红颜色”标识的函数插件时": "現在您點擊任意“紅顏色”標識的函數插件時",
"上下文管理器必须实现两个方法": "上下文管理器必須實現兩個方法",
"匹配^数字^": "匹配^數字^",
"也是可读的": "也是可讀的",
"将音频解析为简体中文": "將音頻解析為簡體中文",
"依次访问网页": "依次訪問網頁",
"P.S. 顺便把CTEX塞进去以支持中文": "P.S. 順便把CTEX塞進去以支持中文",
"NewBing响应异常": "NewBing響應異常",
"获取已打开频道的最新消息并返回消息列表": "獲取已打開頻道的最新消息並返回消息列表",
"请使用Markdown": "請使用Markdown",
"例如 RoPlZrM88DnAFkZK": "例如 RoPlZrM88DnAFkZK",
"编译BibTex": "編譯BibTex",
"Claude失败": "Claude失敗",
"请更换为API_URL_REDIRECT配置": "請更換為API_URL_REDIRECT配置",
"P.S. 其他可用的模型还包括": "P.S. 其他可用的模型還包括",
"色彩主体": "色彩主體",
"后面是英文逗号": "後面是英文逗號",
"下载pdf文件未成功": "下載pdf文件未成功",
"删除整行的空注释": "刪除整行的空注釋",
"吸收匿名公式": "吸收匿名公式",
"从而更全面地理解项目的整体功能": "從而更全面地理解項目的整體功能",
"不需要再次转化": "不需要再次轉化",
"可以将自身的状态存储到cookie中": "可以將自身的狀態存儲到cookie中",
"1、英文题目2、中文题目翻译3、作者4、arxiv公开": "1、英文題目2、中文題目翻譯3、作者4、arxiv公開",
"GPT 学术优化": "GPT 學術優化",
"解析整个Python项目": "解析整個Python項目",
"吸收其他杂项": "吸收其他雜項",
"-预热文本向量化模组": "-預熱文本向量化模組",
"Claude组件初始化成功": "Claude組件初始化成功",
"此处填API密钥": "此處填API密鑰",
"请继续分析其他源代码": "請繼續分析其他源代碼",
"质能方程式": "質能方程式",
"功能尚不稳定": "功能尚不穩定",
"使用教程详情见 request_llm/README.md": "使用教程詳情見 request_llm/README.md",
"从以上搜索结果中抽取信息": "從以上搜索結果中抽取信息",
"虽然PDF生成失败了": "雖然PDF生成失敗了",
"找图片": "尋找圖片",
"还原原文": "還原原文",
"可调节线程池的大小避免openai的流量限制错误": "可調整線程池大小以避免openai流量限制錯誤",
"正在提取摘要并下载PDF文档……": "正在提取摘要並下載PDF文件......",
"缺少ChatGLMFT的依赖": "缺少ChatGLMFT的依賴",
"不会实时显示在界面上": "不會即時顯示在界面上",
"解决部分词汇翻译不准确的问题": "解決部分詞彙翻譯不準確的問題",
"等待多线程操作": "等待多線程操作",
"吸收title与作者以上的部分": "吸收標題與作者以上的部分",
"如果需要使用Slack Claude": "如果需要使用Slack Claude",
"一、论文概况": "一、論文概況",
"默认为Chinese": "默認為中文",
"图像生成所用到的提示文本": "圖像生成所用到的提示文本",
"向已打开的频道发送一条文本消息": "向已打開的頻道發送一條文本消息",
"如果某个子任务出错": "如果某個子任務出錯",
"chatglmft 没有 sys_prompt 接口": "chatglmft沒有sys_prompt接口",
"对比PDF编译是否成功": "對比PDF編譯是否成功",
"免费": "免費",
"请讲话": "請講話",
"安装ChatGLM的依赖": "安裝ChatGLM的依賴",
"对IPynb文件进行解析": "對IPynb文件進行解析",
"文件路径列表": "文件路徑列表",
"或者使用此插件继续上传更多文件": "或者使用此插件繼續上傳更多文件",
"随机负载均衡": "隨機負載均衡",
"!!!如果需要运行量化版本": "!!!如果需要運行量化版本",
"注意目前不能多人同时调用Claude接口": "注意目前不能多人同時調用Claude接口",
"文件读取完成": "文件讀取完成",
"用于灵活调整复杂功能的各种参数": "用於靈活調整複雜功能的各種參數",
"**函数功能**": "**函數功能**",
"先切换模型到openai或api2d": "先切換模型到openai或api2d",
"You are associated with a deactivated account. OpenAI以账户失效为由": "您的帳戶已停用。OpenAI以帳戶失效為由",
"你的回答必须简单明了": "您的回答必須簡單明了",
"是否丢弃掉 不是正文的内容": "是否丟棄掉 不是正文的內容",
"但请查收结果": "但請查收結果",
"Claude响应缓慢": "Claude響應緩慢",
"需Latex": "需Latex",
"Claude回复的片段": "Claude回復的片段",
"如果要使用ChatGLMFT": "如果要使用ChatGLMFT",
"它*必须*被包含在AVAIL_LLM_MODELS列表中": "它*必須*被包含在AVAIL_LLM_MODELS列表中",
"前面是中文逗号": "前面是中文逗號",
"需要预先pip install py7zr": "需要預先pip install py7zr",
"将前后断行符脱离": "將前後斷行符脫離",
"防止丢失最后一条消息": "防止丟失最後一條消息",
"初始化插件状态": "初始化插件狀態",
"以秒为单位": "以秒為單位",
"中文Latex项目全文润色": "中文Latex項目全文潤色",
"对整个Latex项目进行纠错": "對整個Latex項目進行校對",
"NEWBING_COOKIES未填写或有格式错误": "NEWBING_COOKIES未填寫或有格式錯誤",
"函数插件作者": "函數插件作者",
"结束": "結束",
"追加历史": "追加歷史",
"您需要首先调用构建知识库": "您需要首先調用構建知識庫",
"如果程序停顿5分钟以上": "如果程序停頓5分鐘以上",
"ChatGLMFT响应异常": "ChatGLMFT響應異常",
"根据当前的模型类别": "根據當前的模型類別",
"才能继续下面的步骤": "才能繼續下面的步驟",
"并将返回的频道ID保存在属性CHANNEL_ID中": "並將返回的頻道ID保存在屬性CHANNEL_ID中",
"请查收结果": "請查收結果",
"解决插件锁定时的界面显示问题": "解決插件鎖定時的界面顯示問題",
"待提取的知识库名称id": "待提取的知識庫名稱id",
"Claude响应异常": "Claude響應異常",
"当前代理可用性": "當前代理可用性",
"代理网络配置": "代理網絡配置",
"我将为您查找相关壁纸": "我將為您查找相關壁紙",
"没给定指令": "沒給定指令",
"音频内容是": "音頻內容是",
"用该压缩包+ConversationHistoryArchive进行反馈": "用該壓縮包+ConversationHistoryArchive進行反饋",
"总结音频": "總結音頻",
"等待用户的再次调用": "等待用戶的再次調用",
"永远给定None": "永遠給定None",
"论文概况": "論文概況",
"建议使用英文单词": "建議使用英文單詞",
"刷新Gradio前端界面": "刷新Gradio前端界面",
"列表递归接龙": "列表遞歸接龍",
"赋予插件状态": "賦予插件狀態",
"构建完成": "構建完成",
"避免多用户干扰": "避免多用戶干擾",
"当前工作路径为": "當前工作路徑為",
"用黑色标注转换区": "用黑色標注轉換區",
"压缩包": "壓縮包",
"刷新页面即可以退出KnowledgeBaseQA模式": "刷新頁面即可以退出KnowledgeBaseQA模式",
"拆分过长的Markdown文件": "拆分過長的Markdown文件",
"生成时间戳": "生成時間戳",
"尚未完成全部响应": "尚未完成全部響應",
"HotReload的装饰器函数": "HotReload的裝飾器函數",
"请务必用 pip install -r requirements.txt 指令安装依赖": "請務必用 pip install -r requirements.txt 指令安裝依賴",
"TGUI不支持函数插件的实现": "TGUI不支持函數插件的實現",
"音频文件名": "音頻文件名",
"找不到任何音频或视频文件": "找不到任何音頻或視頻文件",
"音频解析结果": "音頻解析結果",
"如果使用ChatGLM2微调模型": "如果使用ChatGLM2微調模型",
"限制的3/4时": "限制的3/4時",
"获取回复": "獲取回復",
"对话历史写入": "對話歷史寫入",
"记录删除注释后的文本": "記錄刪除註釋後的文本",
"整理结果为压缩包": "整理結果為壓縮包",
"注意事项": "注意事項",
"请耐心等待": "請耐心等待",
"在执行完成之后": "在執行完成之後",
"参数简单": "參數簡單",
"Arixv论文精细翻译": "Arixv論文精細翻譯",
"备份和下载": "備份和下載",
"当前报错的latex代码处于第": "當前報錯的latex代碼處於第",
"Markdown翻译": "Markdown翻譯",
"英文Latex项目全文纠错": "英文Latex項目全文校對",
"获取预处理函数": "獲取預處理函數",
"add gpt task 创建子线程请求gpt": "add gpt task 創建子線程請求gpt",
"一个包含所有切割音频片段文件路径的列表": "一個包含所有切割音頻片段文件路徑的列表",
"解析arxiv网址失败": "解析arxiv網址失敗",
"PDF文件所在的路径": "PDF文件所在路徑",
"取评分最高者返回": "取評分最高者返回",
"此插件处于开发阶段": "此插件處於開發階段",
"如果已经存在": "如果已經存在",
"或者不在环境变量PATH中": "或者不在環境變量PATH中",
"目前支持的格式": "目前支持的格式",
"将多文件tex工程融合为一个巨型tex": "將多文件tex工程融合為一個巨型tex",
"暂不提交": "暫不提交",
"调用函数": "調用函數",
"编译转化后的PDF": "編譯轉化後的PDF",
"将代码转为动画": "將代碼轉為動畫",
"本地Latex论文精细翻译": "本地Latex論文精細翻譯",
"删除或修改歧义文件": "刪除或修改歧義文件",
"其他操作系统表现未知": "其他操作系統表現未知",
"此插件Windows支持最佳": "此插件Windows支持最佳",
"构建知识库": "構建知識庫",
"每个切割音频片段的时长": "每個切割音頻片段的時長",
"用latex编译为PDF对修正处做高亮": "用latex編譯為PDF對修正處做高亮",
"行": "行",
"= 2 通过一些Latex模板中常见": "= 2 通過一些Latex模板中常見",
"如参考文献、脚注、图注等": "如參考文獻、腳註、圖註等",
"期望格式例如": "期望格式例如",
"翻译内容可靠性无保障": "翻譯內容可靠性無保障",
"请用一句话概括这些文件的整体功能": "請用一句話概括這些文件的整體功能",
"段音频完成了吗": "段音頻完成了嗎",
"填入azure openai api的密钥": "填入azure openai api的密鑰",
"文本碎片重组为完整的tex片段": "文本碎片重組為完整的tex片段",
"吸收在42行以內的begin-end組合": "吸收在42行以內的begin-end組合",
"屬性": "屬性",
"必須包含documentclass": "必須包含documentclass",
"等待GPT響應": "等待GPT響應",
"當前語言模型溫度設定": "當前語言模型溫度設定",
"模型選擇是": "選擇的模型為",
"reverse 操作必須放在最後": "reverse 操作必須放在最後",
"將子線程的gpt結果寫入chatbot": "將子線程的gpt結果寫入chatbot",
"默認為default": "默認為default",
"目前對機器學習類文獻轉化效果最好": "目前對機器學習類文獻轉化效果最好",
"主程序即將開始": "主程序即將開始",
"點擊“停止”鍵可終止程序": "點擊“停止”鍵可終止程序",
"正在處理": "正在處理",
"請立即終止程序": "請立即停止程序",
"將 chatglm 直接對齊到 chatglm2": "將 chatglm 直接對齊到 chatglm2",
"音頻助手": "音頻助手",
"正在構建知識庫": "正在構建知識庫",
"請向下翻": "請向下滾動頁面",
"後面是英文冒號": "後面是英文冒號",
"無法找到一個主Tex文件": "無法找到一個主Tex文件",
"使用中文总结音频“": "使用中文總結音頻",
"该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成": "該PDF由GPT-Academic開源項目調用大語言模型+Latex翻譯插件一鍵生成",
"开始生成动画": "開始生成動畫",
"完成情况": "完成情況",
"然后进行问答": "然後進行問答",
"为啥chatgpt会把cite里面的逗号换成中文逗号呀": "為啥chatgpt會把cite裡面的逗號換成中文逗號呀",
"暂时不支持历史消息": "暫時不支持歷史消息",
"项目Github地址 \\url{https": "項目Github地址 \\url{https",
"Newbing 请求失败": "Newbing 請求失敗",
"根据自然语言执行插件命令": "根據自然語言執行插件命令",
"迭代上一次的结果": "迭代上一次的結果",
"azure和api2d请求源": "azure和api2d請求源",
"格式如org-xxxxxxxxxxxxxxxxxxxxxxxx": "格式如org-xxxxxxxxxxxxxxxxxxxxxxxx",
"推荐http": "推薦http",
"将要匹配的模式": "將要匹配的模式",
"代理数据解析失败": "代理數據解析失敗",
"创建存储切割音频的文件夹": "創建存儲切割音頻的文件夾",
"用红色标注处保留区": "用紅色標注處保留區",
"至少一个线程任务Token溢出而失败": "至少一個線程任務Token溢出而失敗",
"获取Slack消息失败": "獲取Slack消息失敗",
"极少数情况下": "極少數情況下",
"辅助gpt生成代码": "輔助gpt生成代碼",
"生成图像": "生成圖像",
"最多收纳多少个网页的结果": "最多收納多少個網頁的結果",
"获取图片URL": "獲取圖片URL",
"正常状态": "正常狀態",
"编译原始PDF": "編譯原始PDF",
"SummarizeAudioAndVideo内容": "音視頻摘要內容",
"Latex文件融合完成": "Latex文件融合完成",
"获取线程锁": "獲取線程鎖",
"SlackClient类用于与Slack API进行交互": "SlackClient類用於與Slack API進行交互",
"检测到arxiv文档连接": "檢測到arxiv文檔連接",
"--读取参数": "--讀取參數",
"如果您是论文原作者": "如果您是論文原作者",
"5刀": "5美元",
"转化PDF编译是否成功": "轉換PDF編譯是否成功",
"生成带有段落标签的HTML代码": "生成帶有段落標籤的HTML代碼",
"目前不支持历史消息查询": "目前不支持歷史消息查詢",
"将文件添加到chatbot cookie中": "將文件添加到chatbot cookie中",
"多线程操作已经开始": "多線程操作已經開始",
"请求子进程": "請求子進程",
"将Unsplash API中的PUT_YOUR_QUERY_HERE替换成描述该事件的一个最重要的单词": "將Unsplash API中的PUT_YOUR_QUERY_HERE替換成描述該事件的一個最重要的單詞",
"不能加载Claude组件": "不能加載Claude組件",
"请仔细鉴别并以原文为准": "請仔細鑒別並以原文為準",
"否则结束循环": "否則結束循環",
"插件可读取“输入区”文本/路径作为参数": "插件可讀取“輸入區”文本/路徑作為參數",
"网络错误": "網絡錯誤",
"想象一个穿着者": "想像一個穿著者",
"避免遗忘导致死锁": "避免遺忘導致死鎖",
"保证括号正确": "保證括號正確",
"报错信息": "錯誤信息",
"提取视频中的音频": "提取視頻中的音頻",
"初始化音频采集线程": "初始化音頻採集線程",
"参考文献转Bib": "參考文獻轉Bib",
"阿里云实时语音识别 配置难度较高 仅建议高手用户使用 参考 https": "阿里云即時語音識別配置難度較高,僅建議高手用戶使用,參考 https",
"使用时": "使用時",
"处理个别特殊插件的锁定状态": "處理個別特殊插件的鎖定狀態",
"但通常不会出现在正文": "但通常不會出現在正文",
"此函数逐渐地搜索最长的条目进行剪辑": "此函數逐漸地搜索最長的條目進行剪輯",
"给出指令": "給出指令",
"读取音频文件": "讀取音頻文件",
"========================================= 插件主程序1 =====================================================": "========================================= 插件主程序1 =====================================================",
"带超时倒计时": "帶超時倒計時",
"禁止移除或修改此警告": "禁止移除或修改此警告",
"ChatGLMFT尚未加载": "ChatGLMFT尚未加載",
"双手离开鼠标键盘吧": "雙手離開鼠標鍵盤吧",
"缺少的依赖": "缺少的依賴",
"的单词": "的單詞",
"中读取数据构建知识库": "中讀取數據構建知識庫",
"函数热更新是指在不停止程序运行的情况下": "函數熱更新是指在不停止程序運行的情況下",
"建议低于1": "建議低於1",
"转化PDF编译已经成功": "轉換PDF編譯已經成功",
"出问题了": "出問題了",
"欢迎使用 MOSS 人工智能助手!": "歡迎使用 MOSS 人工智能助手!",
"正在精细切分latex文件": "正在精細切分LaTeX文件",
"”补上": "”補上",
"网络代理状态": "網路代理狀態",
"依赖检测通过": "依賴檢測通過",
"默认为default": "預設為default",
"Call MOSS fail 不能正常加载MOSS的参数": "呼叫MOSS失敗無法正常載入MOSS參數",
"音频助手": "音頻助手",
"次编译": "次編譯",
"其他错误": "其他錯誤",
"属性": "屬性",
"主程序即将开始": "主程式即將開始",
"Aliyun音频服务异常": "Aliyun音頻服務異常",
"response中会携带traceback报错信息": "response中會攜帶traceback錯誤信息",
"一些普通功能模块": "一些普通功能模組",
"和openai的连接容易断掉": "和openai的連線容易斷掉",
"请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期": "請檢查ALIYUN_TOKEN和ALIYUN_APPKEY是否過期",
"调用Claude时": "呼叫Claude時",
"插件锁定中": "插件鎖定中",
"将子线程的gpt结果写入chatbot": "將子線程的gpt結果寫入chatbot",
"当下一次用户提交时": "當下一次使用者提交時",
"先上传数据集": "先上傳資料集",
"请在此处追加更细致的矫错指令": "請在此處追加更細緻的矯錯指令",
"无法找到一个主Tex文件": "無法找到一個主Tex文件",
"gpt写的": "gpt寫的",
"预处理": "預處理",
"但大部分场合下并不需要修改": "但大部分場合下並不需要修改",
"正在构建知识库": "正在建構知識庫",
"开始请求": "開始請求",
"根据以上分析": "根據以上分析",
"需要特殊依赖": "需要特殊依賴",
"用于基础的对话功能": "用於基礎的對話功能",
"且没有代码段": "且沒有程式碼段",
"取决于": "取決於",
"openai的官方KEY需要伴隨組織編碼": "請填入組織編碼",
"等待newbing回覆的片段": "等待newbing回覆的片段",
"调用缓存": "呼叫快取",
"模型选择是": "模型選擇為",
"当前大语言模型": "當前大語言模型",
"然后转移到指定的另一个路径中": "然後轉移到指定的另一個路徑中",
"请向下翻": "請向下滾動",
"内容太长了都会触发token数量溢出的错误": "內容太長會觸發token數量溢出的錯誤",
"每一块": "每一塊",
"详情信息见requirements.txt": "詳細信息見requirements.txt",
"没有提供高级参数功能说明": "沒有提供高級參數功能說明",
"上传Latex项目": "上傳Latex項目",
"请立即终止程序": "請立即終止程式",
"解除插件锁定": "解除插件鎖定",
"意外Json结构": "意外Json結構",
"必须包含documentclass": "必須包含documentclass",
"10个文件为一组": "10個文件為一組",
"openai的官方KEY需要伴随组织编码": "openai的官方KEY需要伴隨組織編碼",
"重置文件的创建时间": "重置文件的創建時間",
"尽量是完整的一个section": "盡量是完整的一個section",
"报告如何远程获取": "報告如何遠程獲取",
"work_folder = Latex预处理": "work_folder = Latex預處理",
"吸收在42行以内的begin-end组合": "吸收在42行以內的begin-end組合",
"后面是英文冒号": "後面是英文冒號",
"使用latexdiff生成论文转化前后对比": "使用latexdiff生成論文轉化前後對比",
"首先你在英文语境下通读整篇论文": "首先你在英文語境下通讀整篇論文",
"为了防止大语言模型的意外谬误产生扩散影响": "為了防止大語言模型的意外謬誤產生擴散影響",
"发现已经存在翻译好的PDF文档": "發現已經存在翻譯好的PDF文檔",
"点击“停止”键可终止程序": "點擊“停止”鍵可終止程序",
"数学GenerateAnimations": "數學GenerateAnimations",
"随变按钮的回调函数注册": "隨變按鈕的回調函數註冊",
"history至少释放二分之一": "history至少釋放二分之一",
"当前语言模型温度设定": "當前語言模型溫度設定",
"等待GPT响应": "等待GPT響應",
"正在处理": "正在處理",
"多线程翻译开始": "多線程翻譯開始",
"reverse 操作必须放在最后": "reverse 操作必須放在最後",
"等待newbing回复的片段": "等待newbing回覆的片段",
"开始下载": "開始下載",
"将 chatglm 直接对齐到 chatglm2": "將 chatglm 直接對齊到 chatglm2",
"以上材料已经被写入": "以上材料已經被寫入",
"上传文件自动修正路径": "上傳文件自動修正路徑",
"然后请使用Markdown格式封装": "然後請使用Markdown格式封裝",
"目前对机器学习类文献转化效果最好": "目前對機器學習類文獻轉化效果最好",
"检查结果": "檢查結果",
"、地址": "地址",
"如.md": "如.md",
"使用Unsplash API": "使用Unsplash API",
"**输入参数说明**": "**輸入參數說明**",
"新版本可用": "新版本可用",
"找不到任何python文件": "找不到任何python文件",
"知乎": "知乎",
"日": "日",
"“喂狗”": "“喂狗”",
"第4步": "第4步",
"退出": "退出",
"使用 Unsplash API": "使用 Unsplash API"
}

View File

@ -6,17 +6,23 @@
pip install --upgrade pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
```
如果因为中国特色网络问题导致上述命令无法执行:
如果因为特色网络问题导致上述命令无法执行:
1. git clone alibabacloud-nls-python-sdk这个项目或者直接前往Github对应网址下载压缩包.
命令行输入: `git clone https://github.com/aliyun/alibabacloud-nls-python-sdk.git`
1. 进入alibabacloud-nls-python-sdk目录命令行输入`python setup.py install`
## 2. 配置音频功能开关 和 阿里云APPKEYconfig.py/config_private.py/环境变量)
- 注册阿里云账号
- 开通 智能语音交互 (有免费白嫖时长)
- 获取token和appkey
- 未来将逐步用其他更廉价的云服务取代阿里云
```
ENABLE_AUDIO = True
ALIYUN_TOKEN = "554a50fcd0bb476c8d07bb630e94d20c" # 例如 f37f30e0f9934c34a992f6f64f7eba4f
ALIYUN_APPKEY = "RoPlZrM88DnAFkZK" # 例如 RoPlZrM88DnAFkZK
ALIYUN_TOKEN = "554a50fcd0bb476c8d07bb630e94d20c" # 此token已经失效
ALIYUN_APPKEY = "RoPlZrM88DnAFkZK" # 此appkey已经失效
```
参考 https://help.aliyun.com/document_detail/450255.html
@ -38,7 +44,9 @@ II 如果需要监听电脑音频(不监听自己说话),需要安装`VB-A
- 3-2 勾选Listen to this device。
- 3-3 在playback through this device下拉菜单中选择你的正常耳机或音响。
III 两种音频监听模式切换时,需要刷新页面才有效
III `[把特殊软件如腾讯会议的外放声音用VoiceMeeter截留]` 在完成步骤II的基础上在特殊软件如腾讯会议打开声音菜单选择扬声器VoiceMeeter Input选择麦克风为正常耳机麦
VI 两种音频监听模式切换时,需要刷新页面才有效。
## 5.点击函数插件区“实时音频采集” 或者其他音频交互功能

View File

@ -14,7 +14,7 @@ def main():
if not AUTHENTICATION: AUTHENTICATION = None
from check_proxy import get_current_version
from theme.theme import adjust_theme, advanced_css, theme_declaration
from themes.theme import adjust_theme, advanced_css, theme_declaration
initial_prompt = "Serve me as a writing and programming assistant."
title_html = f"<h1 align=\"center\">GPT 学术优化 {get_current_version()}</h1>{theme_declaration}"
description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic),感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""
@ -22,8 +22,10 @@ def main():
# 问询记录, python 版本建议3.9+(越新越好)
import logging, uuid
os.makedirs("gpt_log", exist_ok=True)
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
# Disable logging output from the 'httpx' logger
logging.getLogger("httpx").setLevel(logging.WARNING)
print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
# 一些普通功能模块

View File

@ -170,6 +170,29 @@ model_info = {
AVAIL_LLM_MODELS, LLM_MODEL = get_conf("AVAIL_LLM_MODELS", "LLM_MODEL")
AVAIL_LLM_MODELS = AVAIL_LLM_MODELS + [LLM_MODEL]
if "claude-1-100k" in AVAIL_LLM_MODELS or "claude-2" in AVAIL_LLM_MODELS:
from .bridge_claude import predict_no_ui_long_connection as claude_noui
from .bridge_claude import predict as claude_ui
model_info.update({
"claude-1-100k": {
"fn_with_ui": claude_ui,
"fn_without_ui": claude_noui,
"endpoint": None,
"max_token": 8196,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
model_info.update({
"claude-2": {
"fn_with_ui": claude_ui,
"fn_without_ui": claude_noui,
"endpoint": None,
"max_token": 8196,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
},
})
if "jittorllms_rwkv" in AVAIL_LLM_MODELS:
from .bridge_jittorllms_rwkv import predict_no_ui_long_connection as rwkv_noui
from .bridge_jittorllms_rwkv import predict as rwkv_ui
@ -225,7 +248,6 @@ if "moss" in AVAIL_LLM_MODELS:
if "stack-claude" in AVAIL_LLM_MODELS:
from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
from .bridge_stackclaude import predict as claude_ui
# claude
model_info.update({
"stack-claude": {
"fn_with_ui": claude_ui,
@ -240,7 +262,6 @@ if "newbing-free" in AVAIL_LLM_MODELS:
try:
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
from .bridge_newbingfree import predict as newbingfree_ui
# claude
model_info.update({
"newbing-free": {
"fn_with_ui": newbingfree_ui,
@ -257,7 +278,6 @@ if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free
try:
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
from .bridge_newbingfree import predict as newbingfree_ui
# claude
model_info.update({
"newbing": {
"fn_with_ui": newbingfree_ui,
@ -274,7 +294,6 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
try:
from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
from .bridge_chatglmft import predict as chatglmft_ui
# claude
model_info.update({
"chatglmft": {
"fn_with_ui": chatglmft_ui,
@ -287,7 +306,22 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
})
except:
print(trimmed_format_exc())
if "internlm" in AVAIL_LLM_MODELS:
try:
from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
from .bridge_internlm import predict as internlm_ui
model_info.update({
"internlm": {
"fn_with_ui": internlm_ui,
"fn_without_ui": internlm_noui,
"endpoint": None,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})
except:
print(trimmed_format_exc())
def LLM_CATCH_EXCEPTION(f):
"""

View File

@ -37,15 +37,23 @@ class GetGLMHandle(Process):
# 子进程执行
# 第一次运行,加载参数
retry = 0
LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
if LOCAL_MODEL_QUANT == "INT4": # INT4
_model_name_ = "THUDM/chatglm2-6b-int4"
elif LOCAL_MODEL_QUANT == "INT8": # INT8
_model_name_ = "THUDM/chatglm2-6b-int8"
else:
_model_name_ = "THUDM/chatglm2-6b" # FP16
while True:
try:
if self.chatglm_model is None:
self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
device, = get_conf('LOCAL_MODEL_DEVICE')
self.chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
if device=='cpu':
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
else:
self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
self.chatglm_model = self.chatglm_model.eval()
break
else:

View File

@ -174,9 +174,10 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
chunk = next(stream_response)
except StopIteration:
# 非OpenAI官方接口的出现这样的报错OpenAI和API2D不会走这里
from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk.decode())}")
yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk.decode()) # 刷新界面
chunk_decoded = chunk.decode()
error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
return
# print(chunk.decode()[6:])
@ -187,7 +188,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
if chunk:
try:
chunk_decoded = chunk.decode()
# 前者API2D的
# 前者API2D的结束条件后者是OPENAI的结束条件
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
# 判定为数据流的结束gpt_replying_buffer也写完了
logging.info(f'[response] {gpt_replying_buffer}')
@ -200,41 +201,45 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
except Exception as e:
traceback.print_exc()
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode()
error_msg = chunk_decoded
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
if "reduce the length" in error_msg:
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入history[-2] 是本次输入, history[-1] 是本次输出
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
# history = [] # 清除历史
elif "does not exist" in error_msg:
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
elif "Incorrect API key" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
elif "exceeded your current quota" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
elif "account is not active" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
elif "associated with a deactivated account" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
elif "bad forward key" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
elif "Not enough point" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
else:
from toolbox import regular_txt_to_markdown
tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg)
return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
from .bridge_all import model_info
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
if "reduce the length" in error_msg:
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入history[-2] 是本次输入, history[-1] 是本次输出
history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'],
max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
# history = [] # 清除历史
elif "does not exist" in error_msg:
chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
elif "Incorrect API key" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
elif "exceeded your current quota" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
elif "account is not active" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
elif "associated with a deactivated account" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
elif "bad forward key" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
elif "Not enough point" in error_msg:
chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
else:
from toolbox import regular_txt_to_markdown
tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
return chatbot, history
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""
整合所有信息选择LLM模型生成http请求为发送请求做准备

View File

@ -0,0 +1,231 @@
# 借鉴了 https://github.com/GaiZhenbiao/ChuanhuChatGPT 项目
"""
该文件中主要包含2个函数
不具备多线程能力的函数:
1. predict: 正常对话时使用,具备完备的交互功能,不可多线程
具备多线程调用能力的函数
2. predict_no_ui_long_connection在实验过程中发现调用predict_no_ui处理长文档时和openai的连接容易断掉这个函数用stream的方式解决这个问题同样支持多线程
"""
import os
import json
import time
import gradio as gr
import logging
import traceback
import requests
import importlib
# config_private.py放自己的秘密如API和代理网址
# 读取时首先看是否存在私密的config_private配置文件不受git管控如果有则覆盖原config文件
from toolbox import get_conf, update_ui, trimmed_format_exc, ProxyNetworkActivate
proxies, TIMEOUT_SECONDS, MAX_RETRY, ANTHROPIC_API_KEY = \
get_conf('proxies', 'TIMEOUT_SECONDS', 'MAX_RETRY', 'ANTHROPIC_API_KEY')
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def get_full_error(chunk, stream_response):
"""
获取完整的从Openai返回的报错
"""
while True:
try:
chunk += next(stream_response)
except:
break
return chunk
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False):
"""
发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs
是本次问询的输入
sys_prompt:
系统静默prompt
llm_kwargs
chatGPT的内部调优参数
history
是之前的对话列表
observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
"""
from anthropic import Anthropic
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
prompt = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0
if len(ANTHROPIC_API_KEY) == 0:
raise RuntimeError("没有设置ANTHROPIC_API_KEY选项")
while True:
try:
# make a POST request to the API endpoint, stream=False
from .bridge_all import model_info
anthropic = Anthropic(api_key=ANTHROPIC_API_KEY)
# endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
# with ProxyNetworkActivate()
stream = anthropic.completions.create(
prompt=prompt,
max_tokens_to_sample=4096, # The maximum number of tokens to generate before stopping.
model=llm_kwargs['llm_model'],
stream=True,
temperature = llm_kwargs['temperature']
)
break
except Exception as e:
retry += 1
traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
result = ''
try:
for completion in stream:
result += completion.completion
if not console_slience: print(completion.completion, end='')
if observe_window is not None:
# 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += completion.completion
# 看门狗,如果超过期限没有喂狗,则终止
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。")
except Exception as e:
traceback.print_exc()
return result
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
发送至chatGPT流式获取输出。
用于基础的对话功能。
inputs 是本次问询的输入
top_p, temperature是chatGPT的内部调优参数
history 是之前的对话列表注意无论是inputs还是history内容太长了都会触发token数量溢出的错误
chatbot 为WebUI中显示的对话列表修改它然后yeild出去可以直接修改对话界面内容
additional_fn代表点击的哪个按钮按钮见functional.py
"""
from anthropic import Anthropic
if len(ANTHROPIC_API_KEY) == 0:
chatbot.append((inputs, "没有设置ANTHROPIC_API_KEY"))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
raw_input = inputs
logging.info(f'[raw_input] {raw_input}')
chatbot.append((inputs, ""))
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
try:
prompt = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
except RuntimeError as e:
chatbot[-1] = (inputs, f"您提供的api-key不满足要求不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
return
history.append(inputs); history.append("")
retry = 0
while True:
try:
# make a POST request to the API endpoint, stream=True
from .bridge_all import model_info
anthropic = Anthropic(api_key=ANTHROPIC_API_KEY)
# endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
# with ProxyNetworkActivate()
stream = anthropic.completions.create(
prompt=prompt,
max_tokens_to_sample=4096, # The maximum number of tokens to generate before stopping.
model=llm_kwargs['llm_model'],
stream=True,
temperature = llm_kwargs['temperature']
)
break
except:
retry += 1
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
retry_msg = f",正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
if retry > MAX_RETRY: raise TimeoutError
gpt_replying_buffer = ""
for completion in stream:
try:
gpt_replying_buffer = gpt_replying_buffer + completion.completion
history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg='正常') # 刷新界面
except Exception as e:
from toolbox import regular_txt_to_markdown
tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str}")
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + tb_str) # 刷新界面
return
# https://github.com/jtsang4/claude-to-chatgpt/blob/main/claude_to_chatgpt/adapter.py
def convert_messages_to_prompt(messages):
prompt = ""
role_map = {
"system": "Human",
"user": "Human",
"assistant": "Assistant",
}
for message in messages:
role = message["role"]
content = message["content"]
transformed_role = role_map[role]
prompt += f"\n\n{transformed_role.capitalize()}: {content}"
prompt += "\n\nAssistant: "
return prompt
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""
整合所有信息选择LLM模型生成http请求为发送请求做准备
"""
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
conversation_cnt = len(history) // 2
messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {}
what_i_have_asked["role"] = "user"
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue
if what_gpt_answer["content"] == timeout_bot_msg: continue
messages.append(what_i_have_asked)
messages.append(what_gpt_answer)
else:
messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now)
prompt = convert_messages_to_prompt(messages)
return prompt

View File

@ -0,0 +1,315 @@
from transformers import AutoModel, AutoTokenizer
import time
import threading
import importlib
from toolbox import update_ui, get_conf, Singleton
from multiprocessing import Process, Pipe
model_name = "InternLM"
cmd_to_install = "`pip install ???`"
load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
def try_to_import_special_deps():
import sentencepiece
user_prompt = "<|User|>:{user}<eoh>\n"
robot_prompt = "<|Bot|>:{robot}<eoa>\n"
cur_query_prompt = "<|User|>:{user}<eoh>\n<|Bot|>:"
def combine_history(prompt, hist):
messages = hist
total_prompt = ""
for message in messages:
cur_content = message
cur_prompt = user_prompt.replace("{user}", cur_content[0])
total_prompt += cur_prompt
cur_prompt = robot_prompt.replace("{robot}", cur_content[1])
total_prompt += cur_prompt
total_prompt = total_prompt + cur_query_prompt.replace("{user}", prompt)
return total_prompt
@Singleton
class GetInternlmHandle(Process):
def __init__(self):
# ⭐主进程执行
super().__init__(daemon=True)
self.parent, self.child = Pipe()
self._model = None
self._tokenizer = None
self.info = ""
self.success = True
self.check_dependency()
self.start()
self.threadLock = threading.Lock()
def ready(self):
# ⭐主进程执行
return self._model is not None
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device, = get_conf('LOCAL_MODEL_DEVICE')
if self._model is None:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
if device=='cpu':
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
else:
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
model = model.eval()
return model, tokenizer
def llm_stream_generator(self, **kwargs):
import torch
import logging
import copy
import warnings
import torch.nn as nn
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor():
model = self._model
tokenizer = self._tokenizer
prompt = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
real_prompt = combine_history(prompt, history)
return model, tokenizer, real_prompt, max_length, top_p, temperature
model, tokenizer, prompt, max_length, top_p, temperature = adaptor()
prefix_allowed_tokens_fn = None
logits_processor = None
stopping_criteria = None
additional_eos_token_id = 103028
generation_config = None
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
# 🏃‍♂️🏃‍♂️🏃‍♂️ https://github.com/InternLM/InternLM/blob/efbf5335709a8c8faeac6eaf07193973ff1d56a1/web_demo.py#L25
inputs = tokenizer([prompt], padding=True, return_tensors="pt")
input_length = len(inputs["input_ids"][0])
for k, v in inputs.items():
inputs[k] = v.cuda()
input_ids = inputs["input_ids"]
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
if generation_config is None:
generation_config = model.generation_config
generation_config = copy.deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs)
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
if additional_eos_token_id is not None:
eos_token_id.append(additional_eos_token_id)
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
if has_default_max_length and generation_config.max_new_tokens is None:
warnings.warn(
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
" recommend using `max_new_tokens` to control the maximum length of the generation.",
UserWarning,
)
elif generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
if not has_default_max_length:
logging.warn(
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
UserWarning,
)
if input_ids_seq_length >= generation_config.max_length:
input_ids_string = "input_ids"
logging.warning(
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
# 2. Set generation parameters if not already defined
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
logits_processor = model._get_logits_processor(
generation_config=generation_config,
input_ids_seq_length=input_ids_seq_length,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
stopping_criteria = model._get_stopping_criteria(
generation_config=generation_config, stopping_criteria=stopping_criteria
)
logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None
while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token
outputs = model(
**model_inputs,
return_dict=True,
output_attentions=False,
output_hidden_states=False,
)
next_token_logits = outputs.logits[:, -1, :]
# pre-process distribution
next_token_scores = logits_processor(input_ids, next_token_logits)
next_token_scores = logits_warper(input_ids, next_token_scores)
# sample
probs = nn.functional.softmax(next_token_scores, dim=-1)
if generation_config.do_sample:
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
else:
next_tokens = torch.argmax(probs, dim=-1)
# update generated ids, model inputs, and length for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
model_kwargs = model._update_model_kwargs_for_generation(
outputs, model_kwargs, is_encoder_decoder=False
)
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
output_token_ids = input_ids[0].cpu().tolist()
output_token_ids = output_token_ids[input_length:]
for each_eos_token_id in eos_token_id:
if output_token_ids[-1] == each_eos_token_id:
output_token_ids = output_token_ids[:-1]
response = tokenizer.decode(output_token_ids)
yield response
# stop when each sentence is finished, or if we exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
return
def check_dependency(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
try:
try_to_import_special_deps()
self.info = "依赖检测通过"
self.success = True
except:
self.info = f"缺少{model_name}的依赖,如果要使用{model_name}除了基础的pip依赖以外您还需要运行{cmd_to_install}安装{model_name}的依赖。"
self.success = False
def run(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
# 第一次运行,加载参数
try:
self._model, self._tokenizer = self.load_model_and_tokenizer()
except:
from toolbox import trimmed_format_exc
self.child.send(f'[Local Message] 不能正常加载{model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
raise RuntimeError(f"不能正常加载{model_name}的参数!")
while True:
# 进入任务等待状态
kwargs = self.child.recv()
# 收到消息,开始请求
try:
for response_full in self.llm_stream_generator(**kwargs):
self.child.send(response_full)
except:
from toolbox import trimmed_format_exc
self.child.send(f'[Local Message] 调用{model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
# 请求处理结束,开始下一个循环
self.child.send('[Finish]')
def stream_chat(self, **kwargs):
# ⭐主进程执行
self.threadLock.acquire()
self.parent.send(kwargs)
while True:
res = self.parent.recv()
if res != '[Finish]':
yield res
else:
break
self.threadLock.release()
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic
# ------------------------------------------------------------------------------------------------------------------------
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
"""
⭐多线程方法
函数的说明请见 request_llm/bridge_all.py
"""
_llm_handle = GetInternlmHandle()
if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
if not _llm_handle.success:
error = _llm_handle.info
_llm_handle = None
raise RuntimeError(error)
# chatglm 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
history_feedin.append(["What can I do?", sys_prompt])
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
response = ""
for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
if len(observe_window) >= 1: observe_window[0] = response
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("程序终止。")
return response
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
⭐单线程方法
函数的说明请见 request_llm/bridge_all.py
"""
chatbot.append((inputs, ""))
_llm_handle = GetInternlmHandle()
chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.info)
yield from update_ui(chatbot=chatbot, history=[])
if not _llm_handle.success:
_llm_handle = None
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
# 处理历史信息
history_feedin = []
history_feedin.append(["What can I do?", system_prompt] )
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
# 开始接收chatglm的回复
response = f"[Local Message]: 等待{model_name}响应中 ..."
for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history)
# 总结输出
if response == f"[Local Message]: 等待{model_name}响应中 ...":
response = f"[Local Message]: {model_name}响应异常 ..."
history.extend([inputs, response])
yield from update_ui(chatbot=chatbot, history=history)

View File

@ -447,6 +447,15 @@ class _ChatHub:
"""
Ask a question to the bot
"""
req_header = HEADERS
if self.cookies is not None:
ws_cookies = []
for cookie in self.cookies:
ws_cookies.append(f"{cookie['name']}={cookie['value']}")
req_header.update({
'Cookie': ';'.join(ws_cookies),
})
timeout = aiohttp.ClientTimeout(total=30)
self.session = aiohttp.ClientSession(timeout=timeout)
@ -455,7 +464,7 @@ class _ChatHub:
# Check if websocket is closed
self.wss = await self.session.ws_connect(
wss_link,
headers=HEADERS,
headers=req_header,
ssl=ssl_context,
proxy=self.proxy,
autoping=False,
@ -510,7 +519,11 @@ class _ChatHub:
resp_txt_no_link = ""
while not final:
msg = await self.wss.receive()
objects = msg.data.split(DELIMITER)
try:
objects = msg.data.split(DELIMITER)
except :
continue
for obj in objects:
if obj is None or not obj:
continue
@ -1109,4 +1122,4 @@ class ImageQuery(Query):
if __name__ == "__main__":
main()
main()

View File

@ -10,10 +10,12 @@ def validate_path():
validate_path() # validate path so you can run from base directory
if __name__ == "__main__":
from request_llm.bridge_newbingfree import predict_no_ui_long_connection
# from request_llm.bridge_newbingfree import predict_no_ui_long_connection
# from request_llm.bridge_moss import predict_no_ui_long_connection
# from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
# from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection
# from request_llm.bridge_claude import predict_no_ui_long_connection
from request_llm.bridge_internlm import predict_no_ui_long_connection
llm_kwargs = {
'max_length': 512,
@ -21,58 +23,8 @@ if __name__ == "__main__":
'temperature': 1,
}
result = predict_no_ui_long_connection(inputs="你好",
llm_kwargs=llm_kwargs,
history=[],
sys_prompt="")
result = predict_no_ui_long_connection( inputs="请问什么是质子?",
llm_kwargs=llm_kwargs,
history=["你好", "我好!"],
sys_prompt="")
print('final result:', result)
result = predict_no_ui_long_connection(inputs="what is a hero?",
llm_kwargs=llm_kwargs,
history=["hello world"],
sys_prompt="")
print('final result:', result)
result = predict_no_ui_long_connection(inputs="如何理解传奇?",
llm_kwargs=llm_kwargs,
history=[],
sys_prompt="")
print('final result:', result)
# # print(result)
# from multiprocessing import Process, Pipe
# class GetGLMHandle(Process):
# def __init__(self):
# super().__init__(daemon=True)
# pass
# def run(self):
# # 子进程执行
# # 第一次运行,加载参数
# def validate_path():
# import os, sys
# dir_name = os.path.dirname(__file__)
# root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
# os.chdir(root_dir_assume + '/request_llm/jittorllms')
# sys.path.append(root_dir_assume + '/request_llm/jittorllms')
# validate_path() # validate path so you can run from base directory
# jittorllms_model = None
# import types
# try:
# if jittorllms_model is None:
# from models import get_model
# # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
# args_dict = {'model': 'chatrwkv'}
# print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))')
# jittorllms_model = get_model(types.SimpleNamespace(**args_dict))
# print('done get model')
# except:
# # self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。')
# raise RuntimeError("不能正常加载jittorllms的参数")
# x = GetGLMHandle()
# x.start()
# input()

View File

@ -9,6 +9,7 @@ prompt_toolkit
latex2mathml
python-docx
mdtex2html
anthropic
colorama
Markdown
pygments
@ -17,3 +18,4 @@ openai
numpy
arxiv
rich
pypdf2==2.12.1

View File

@ -9,9 +9,8 @@ def adjust_theme():
set_theme = gr.themes.Default(
primary_hue=gr.themes.utils.colors.orange,
neutral_hue=gr.themes.utils.colors.gray,
font=["sans-serif", "Microsoft YaHei", "ui-sans-serif", "system-ui",
"sans-serif", gr.themes.utils.fonts.GoogleFont("Source Sans Pro")],
font_mono=["ui-monospace", "Consolas", "monospace", gr.themes.utils.fonts.GoogleFont("IBM Plex Mono")])
font=["sans-serif", "Microsoft YaHei", "ui-sans-serif", "system-ui"],
font_mono=["ui-monospace", "Consolas", "monospace"])
set_theme.set(
# Colors
input_background_fill_dark="*neutral_800",
@ -61,7 +60,7 @@ def adjust_theme():
if LAYOUT=="TOP-DOWN":
js = ""
else:
with open('theme/common.js', 'r', encoding='utf8') as f:
with open('themes/common.js', 'r', encoding='utf8') as f:
js = f"<script>{f.read()}</script>"
# 添加一个萌萌的看板娘
@ -83,5 +82,5 @@ def adjust_theme():
print('gradio版本较旧, 不能自定义字体和颜色')
return set_theme
with open("theme/default.css", "r", encoding="utf-8") as f:
with open("themes/default.css", "r", encoding="utf-8") as f:
advanced_css = f.read()

View File

@ -77,7 +77,7 @@ def adjust_theme():
if LAYOUT=="TOP-DOWN":
js = ""
else:
with open('theme/common.js', 'r', encoding='utf8') as f:
with open('themes/common.js', 'r', encoding='utf8') as f:
js = f"<script>{f.read()}</script>"
# 添加一个萌萌的看板娘
@ -100,5 +100,5 @@ def adjust_theme():
return set_theme
with open("theme/green.css", "r", encoding="utf-8") as f:
with open("themes/green.css", "r", encoding="utf-8") as f:
advanced_css = f.read()

View File

@ -538,7 +538,11 @@ def load_chat_cookies():
return {'api_key': API_KEY, 'llm_model': LLM_MODEL}
def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
if len(CUSTOM_API_KEY_PATTERN) != 0:
API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
else:
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
return bool(API_MATCH_ORIGINAL)
def is_azure_api_key(key):
@ -594,7 +598,7 @@ def select_api_key(keys, llm_model):
if is_azure_api_key(k): avail_key_list.append(k)
if len(avail_key_list) == 0:
raise RuntimeError(f"您提供的api-key不满足要求不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源右下角更换模型菜单中可切换openai,azureapi2d请求源")
raise RuntimeError(f"您提供的api-key不满足要求不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源右下角更换模型菜单中可切换openai,azure,claude,api2d请求源)")
api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key
@ -670,13 +674,14 @@ def read_single_conf_with_lru_cache(arg):
# 在读取API_KEY时检查一下是不是忘了改config
if arg == 'API_KEY':
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和API2D的api-key。也支持同时填写多个api-key如API_KEY=\"openai-key1,openai-key2,api2d-key3\"")
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s)也可以在问题输入区输入临时的api-key(s),然后回车键提交后即可生效。")
if is_any_api_key(r):
print亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
else:
print亮红( "[API_KEY] 正确的 API_KEY 'sk'开头的51位密钥OpenAI或者 'fk'开头的41位密钥请在config文件中修改API密钥之后再运行。")
print亮红( "[API_KEY] 的 API_KEY 不满足任何一种已知的密钥格式请在config文件中修改API密钥之后再运行。")
if arg == 'proxies':
if not read_single_conf_with_lru_cache('USE_PROXY'): r = None # 检查USE_PROXY防止proxies单独起作用
if r is None:
print亮红('[PROXY] 网络代理状态未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议检查USE_PROXY选项是否修改。')
else:
@ -685,6 +690,7 @@ def read_single_conf_with_lru_cache(arg):
return r
@lru_cache(maxsize=128)
def get_conf(*args):
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
res = []
@ -883,4 +889,16 @@ def objload(file='objdump.tmp'):
return
with open(file, 'rb') as f:
return pickle.load(f)
def Singleton(cls):
"""
一个单实例装饰器
"""
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton

View File

@ -1,5 +1,5 @@
{
"version": 3.45,
"version": 3.47,
"show_feature": true,
"new_feature": "支持加载自定义的ChatGLM2微调模型 <-> [改善UI] 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件"
"new_feature": "优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件(自动断句,脱手对话) <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持"
}