Compare commits

...

24 Commits

Author SHA1 Message Date
fb22f716ff 3d interp 2023-07-25 10:09:50 +08:00
a8093b9dd8 多线程约束越大执行时间 2023-07-22 03:05:04 +08:00
4f55dfdc0e easy 2023-07-22 02:25:30 +08:00
505b10965f CodeInterpreter 2023-07-22 01:48:24 +08:00
a393edfaa4 ALLOW CUSTOM API KEY PATTERN 2023-07-21 22:49:07 +08:00
dd7a01cda5 Merge pull request #976 from fenglui/master
fix msg.data.split(DELIMITER) exception when msg.data is int
2023-07-21 17:02:29 +08:00
00a3b91f95 fix msg.data.split(DELIMITER) exception when msg.data is int 2023-07-21 03:51:33 +08:00
61ba544282 add latex test samples 2023-07-20 19:49:23 +08:00
b5b8c123e4 latex plugin stability improvement 2023-07-20 19:39:22 +08:00
d9ceba959f expand range after failure 2023-07-20 18:39:02 +08:00
6b5b040701 remove pdf merge 2023-07-20 18:29:06 +08:00
4f4c09a5f3 增强Latex修复能力 2023-07-20 18:08:22 +08:00
067bc97cce Merge branch 'interface-interlm' of https://github.com/binary-husky/chatgpt_academic into interface-interlm 2023-07-20 12:46:52 +08:00
7368580cd6 concat pdf after translation 2023-07-20 12:46:48 +08:00
df90db210c Merge branch 'master' into interface-interlm 2023-07-20 11:40:45 +08:00
0927ed20a2 edit default configuration 2023-07-20 11:39:35 +08:00
73b22f85be compat third party gpt error handle 2023-07-20 11:09:22 +08:00
b8d77557b0 Update README.md 2023-07-20 10:12:42 +08:00
99b8fce8f3 Merge pull request #965 from QQisQQ/patch-2
解决new bing 报错200 (fix new bing error code 200 )
2023-07-19 10:15:15 +08:00
16364f1b2d Merge pull request #966 from doujiang-zheng/master
Add timestamp for chat_secrets.log and disable the verbose httpx log.
2023-07-19 10:14:36 +08:00
3b88e00cfb Add timestamp for chat_secrets.log and disable the verbose httpx log. 2023-07-19 09:43:59 +08:00
0c8c539e9b 解决new bing 报错200 (fix new bing error code 200 )
modify from 16e00af9d5

works for my issue:
```
Traceback (most recent call last):
  File "./request_llm/bridge_newbingfree.py", line 152, in run
    asyncio.run(self.async_run())
  File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/runners.py", line 190, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/base_events.py", line 653, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "./request_llm/bridge_newbingfree.py", line 98, in async_run
    async for final, response in self.newbing_model.ask_stream(
  File "./request_llm/edge_gpt_free.py", line 676, in ask_stream
    async for response in self.chat_hub.ask_stream(
  File "./request_llm/edge_gpt_free.py", line 456, in ask_stream
    self.wss = await self.session.ws_connect(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/root/miniconda3/envs/py311/lib/python3.11/site-packages/aiohttp/client.py", line 795, in _ws_connect
    raise WSServerHandshakeError(
aiohttp.client_exceptions.WSServerHandshakeError: 200, message='Invalid response status', url=URL('wss://sydney.bing.com/sydney/ChatHub')
```
2023-07-19 04:39:15 +08:00
fd549fb986 merge success 2023-07-18 19:51:13 +08:00
babb775cfb interface with interlm 2023-07-18 16:33:34 +08:00
20 changed files with 1450 additions and 640 deletions

1
.gitignore vendored
View File

@ -151,3 +151,4 @@ multi-language
request_llm/moss
media
flagged
objdump.tmp

View File

@ -44,7 +44,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
多线程函数插件支持 | 支持多线调用chatgpt一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序

View File

@ -32,9 +32,9 @@ else:
# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
# 重新URL重新定向实现更换API_URL的作用常规情况下不要修改!! 高危设置!通过修改此设置您将把您的API-KEY和对话隐私完全暴露给您设定的中间人
# 格式 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
# 例如 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions":"https://reverse-proxy-url/v1/chat/completions"}
# 重新URL重新定向实现更换API_URL的作用高危设置! 常规情况下不要修改! 通过修改此设置您将把您的API-KEY和对话隐私完全暴露给您设定的中间人
# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
# 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
API_URL_REDIRECT = {}
@ -71,7 +71,7 @@ MAX_RETRY = 2
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
# ChatGLM(2) Finetune Model Path 如果使用ChatGLM2微调模型需要把"chatglmft"加入AVAIL_LLM_MODELS中
@ -137,3 +137,7 @@ ALIYUN_APPKEY="" # 例如 RoPlZrM88DnAFkZK
# Claude API KEY
ANTHROPIC_API_KEY = ""
# 自定义API KEY格式
CUSTOM_API_KEY_PATTERN = ""

View File

@ -432,18 +432,18 @@ def get_crazy_functions():
except:
print('Load function plugin failed')
# try:
# from crazy_functions.虚空终端 import 终端
# function_plugins.update({
# "超级终端": {
# "Color": "stop",
# "AsButton": False,
# # "AdvancedArgs": True,
# # "ArgsReminder": "",
# "Function": HotReload(终端)
# }
# })
# except:
# print('Load function plugin failed')
try:
from crazy_functions.虚空终端CodeInterpreter import 虚空终端CodeInterpreter
function_plugins.update({
"虚空终端CodeInterpreter": {
"Color": "stop",
"AsButton": True,
# "AdvancedArgs": True,
# "ArgsReminder": "",
"Function": HotReload(虚空终端CodeInterpreter)
}
})
except:
print('Load function plugin failed')
return function_plugins

View File

@ -157,7 +157,7 @@ def Latex英文纠错加PDF对比(txt, llm_kwargs, plugin_kwargs, chatbot, histo
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_utils import Latex精细分解与转化, 编译Latex
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])
@ -234,7 +234,7 @@ def Latex翻译中文并重新编译PDF(txt, llm_kwargs, plugin_kwargs, chatbot,
try:
import glob, os, time, subprocess
subprocess.Popen(['pdflatex', '-version'])
from .latex_utils import Latex精细分解与转化, 编译Latex
from .latex_fns.latex_actions import Latex精细分解与转化, 编译Latex
except Exception as e:
chatbot.append([ f"解析项目: {txt}",
f"尝试执行Latex指令失败。Latex没有安装, 或者不在环境变量PATH中。安装方法https://tug.org/texlive/。报错信息\n\n```\n\n{trimmed_format_exc()}\n\n```\n\n"])

View File

@ -195,9 +195,12 @@ def test_Latex():
# txt = r"https://arxiv.org/abs/2303.08774"
# txt = r"https://arxiv.org/abs/2303.12712"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2303.12712\workfolder"
txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
# txt = r"2306.17157" # 这个paper有个input命令文件名大小写错误
# txt = "https://arxiv.org/abs/2205.14135"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
# txt = r"C:\Users\fuqingxu\arxiv_cache\2205.14135\workfolder"
txt = r"2210.03629"
txt = r"2307.04964"
for cookies, cb, hist, msg in (Latex翻译中文并重新编译PDF)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb) # print(cb)
@ -225,6 +228,22 @@ def test_chatglm_finetune():
for cookies, cb, hist, msg in (启动微调)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb)
def test_虚空终端CodeInterpreter():
from crazy_functions.虚空终端CodeInterpreter import 虚空终端CodeInterpreter
txt = 'Convert this dataset to excel.'
plugin_kwargs = {"recently_uploaded_files":"build/assets/iris.csv"}
for cookies, cb, hist, msg in (虚空终端CodeInterpreter)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb)
def test_解析项目源代码炫酷版():
from crazy_functions.解析项目源代码炫酷版 import 解析一个Python项目炫酷版
txt = './'
for cookies, cb, hist, msg in (解析一个Python项目炫酷版)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
cli_printer.print(cb)
if __name__ == "__main__":
# test_解析一个Python项目()
@ -241,6 +260,8 @@ if __name__ == "__main__":
# test_Langchain知识库()
# test_Langchain知识库读取()
# test_Latex()
test_chatglm_finetune()
# test_chatglm_finetune()
# test_虚空终端CodeInterpreter()
test_解析项目源代码炫酷版()
input("程序完成,回车退出。")
print("退出。")

View File

@ -141,7 +141,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
chatbot, history_array, sys_prompt_array,
refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
handle_token_exceed=True, show_user_at_complete=False,
retry_times_at_unknown_error=2,
retry_times_at_unknown_error=2, callback_fn=None
):
"""
Request GPT model using multiple threads with UI and high efficiency
@ -166,6 +166,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
handle_token_exceed是否自动处理token溢出的情况如果选择自动处理则会在溢出时暴力截断默认开启
show_user_at_complete (bool, optional): (在结束时,把完整输入-输出结果显示在聊天框)
retry_times_at_unknown_error子任务失败时的重试次数
callback_fn: 当信息更新时,在主进程调用的回调函数
输出 Returns:
list: List of GPT model responses 每个子任务的输出汇总如果某个子任务出错response中会携带traceback报错信息方便调试和定位问题。
@ -283,6 +284,9 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
# 在前端打印些好玩的东西
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
# 回调函数
if callback_fn is not None: callback_fn([mutable[thread_index][0] for thread_index in range(len(futures))])
# 结束了吗?
if all(worker_done):
executor.shutdown()
break

View File

@ -1,320 +1,16 @@
from toolbox import update_ui, update_ui_lastest_msg # 刷新Gradio前端界面
from toolbox import zip_folder, objdump, objload, promote_file_to_downloadzone
from .latex_toolbox import PRESERVE, TRANSFORM
from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
import os, shutil
import re
import numpy as np
pj = os.path.join
"""
========================================================================
Part One
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
========================================================================
"""
PRESERVE = 0
TRANSFORM = 1
def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = -1
p = begin = end = res.regs[0][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p+1
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
"""
pattern_compile = re.compile(pattern, flags)
def search_with_line_limit(text, mask):
for res in pattern_compile.finditer(text):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
this, this_mask = search_with_line_limit(this, this_mask)
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
else:
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
return text, mask
return search_with_line_limit(text, mask)
class LinkedListNode():
"""
Linked List Node
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
# self.begin_line = 0
# self.begin_char = 0
def convert_to_linklist(text, mask):
root = LinkedListNode("", preserve=True)
current_node = root
for c, m, i in zip(text, mask, range(len(text))):
if (m==PRESERVE and current_node.preserve) \
or (m==TRANSFORM and not current_node.preserve):
# add
current_node.string += c
else:
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
current_node = current_node.next
return root
"""
========================================================================
Latex Merge File
========================================================================
"""
def 寻找Latex主文件(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
else:
continue
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.lstrip().startswith("%"):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
def find_tex_file_ignore_case(fp):
dir_name = os.path.dirname(fp)
base_name = os.path.basename(fp)
if not base_name.endswith('.tex'): base_name+='.tex'
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
# go case in-sensitive
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
"""
Merge Tex project recrusively
"""
main_file = rm_comments(main_file)
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp}Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
Merge Tex project recrusively
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
main_file = rm_comments(main_file)
if mode == 'translate_zh':
# find paper documentclass
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find documentclass statement!"
position = match.end()
add_ctex = '\\usepackage{ctex}\n'
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
return main_file
"""
========================================================================
Post process
========================================================================
"""
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
# walk and replace any _ without \
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
def compute_brace_level(string):
# this function count the number of { and }
brace_level = 0
for c in string:
if c == "{": brace_level += 1
elif c == "}": brace_level -= 1
return brace_level
def join_most(tex_t, tex_o):
# this function join translated string and original string when something goes wrong
p_t = 0
p_o = 0
def find_next(string, chars, begin):
p = begin
while p < len(string):
if string[p] in chars: return p, string[p]
p += 1
return None, None
while True:
res1, char = find_next(tex_o, ['{','}'], p_o)
if res1 is None: break
res2, char = find_next(tex_t, [char], p_t)
if res2 is None: break
p_o = res1 + 1
p_t = res2 + 1
return tex_t[:p_t] + tex_o[p_o:]
if compute_brace_level(final_tex) != compute_brace_level(node_string):
# 出问题了,还原部分原文,保证括号正确
final_tex = join_most(final_tex, node_string)
return final_tex
def split_subprocess(txt, project_folder, return_dict, opts):
"""
@ -356,77 +52,9 @@ def split_subprocess(txt, project_folder, return_dict, opts):
text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
root = convert_to_linklist(text, mask)
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack fix')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
# 最后一步处理,增强稳健性
root = post_process(root)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
# 屏蔽空行和太短的句子
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
node = node.next
if node is None: break
node = root
while True:
if node.next and node.preserve and node.next.preserve:
node.string += node.next.string
node.next = node.next.next
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 输出html调试文件用红色标注处保留区PRESERVE用黑色标注转换区TRANSFORM
with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
segment_parts_for_gpt = []
@ -437,7 +65,7 @@ def split_subprocess(txt, project_folder, return_dict, opts):
show_html = node.string.replace('\n','<br/>')
if not node.preserve:
segment_parts_for_gpt.append(node.string)
f.write(f'<p style="color:black;">#{show_html}#</p>')
f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
else:
f.write(f'<p style="color:red;">{show_html}</p>')
node = node.next
@ -448,8 +76,6 @@ def split_subprocess(txt, project_folder, return_dict, opts):
return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
return return_dict
class LatexPaperSplit():
"""
break down latex file to a linked list,
@ -464,18 +90,32 @@ class LatexPaperSplit():
# 请您不要删除或修改这行警告除非您是论文的原作者如果您是论文原作者欢迎加REAME中的QQ联系开发者
self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
def merge_result(self, arr, mode, msg):
def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
"""
Merge the result after the GPT process completed
"""
result_string = ""
p = 0
node_cnt = 0
line_cnt = 0
for node in self.nodes:
if node.preserve:
line_cnt += node.string.count('\n')
result_string += node.string
else:
result_string += fix_content(arr[p], node.string)
p += 1
translated_txt = fix_content(arr[node_cnt], node.string)
begin_line = line_cnt
end_line = line_cnt + translated_txt.count('\n')
# reverse translation if any error
if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
translated_txt = node.string
result_string += translated_txt
node_cnt += 1
line_cnt += translated_txt.count('\n')
if mode == 'translate_zh':
pattern = re.compile(r'\\begin\{abstract\}.*\n')
match = pattern.search(result_string)
@ -490,6 +130,7 @@ class LatexPaperSplit():
result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
return result_string
def split(self, txt, project_folder, opts):
"""
break down latex file to a linked list,
@ -511,7 +152,6 @@ class LatexPaperSplit():
return self.sp
class LatexPaperFileGroup():
"""
use tokenizer to break down text according to max_token_limit
@ -539,7 +179,7 @@ class LatexPaperFileGroup():
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
from ..crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
@ -560,41 +200,14 @@ class LatexPaperFileGroup():
f.write(res)
return manifest
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
import shutil
from .crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
orig = ""
trans = ""
final = []
for c,r in zip(sp_file_contents, sp_file_result):
final.append(c)
final.append(r)
for i, k in enumerate(final):
if i%2==0:
orig = k
if i%2==1:
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())
def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
import time, os, re
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_utils import LatexPaperFileGroup, merge_tex_files, LatexPaperSplit, 寻找Latex主文件
from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
# <-------- 寻找主tex文件 ---------->
maintex = 寻找Latex主文件(file_manifest, mode)
maintex = find_main_tex_file(file_manifest, mode)
chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
time.sleep(3)
@ -668,54 +281,51 @@ def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin
# <-------- 写出文件 ---------->
msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}"
final_tex = lps.merge_result(pfg.file_result, mode, msg)
objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
# <-------- 整理结果, 退出 ---------->
chatbot.append((f"完成了吗?", 'GPT结果已输出, 正在编译PDF'))
chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 返回 ---------->
return project_folder + f'/merge_{mode}.tex'
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified):
def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
try:
with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
log = f.read()
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
file_lines = f.readlines()
import re
buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
buggy_lines = [int(l) for l in buggy_lines]
buggy_lines = sorted(buggy_lines)
print("removing lines that has errors", buggy_lines)
file_lines.pop(buggy_lines[0]-1)
buggy_line = buggy_lines[0]-1
print("reversing tex line that has errors", buggy_line)
# 重组,逆转出错的段落
if buggy_line not in fixed_line:
fixed_line.append(buggy_line)
lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
f.writelines(file_lines)
f.write(final_tex)
return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
except:
print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
return False, -1, [-1]
def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
import os, time
current_dir = os.getcwd()
n_fix = 1
fixed_line = []
max_try = 32
chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder}如果程序停顿5分钟以上请直接去该路径下取回翻译结果或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
@ -723,6 +333,10 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
while True:
import os
may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
shutil.copyfile(may_exist_bbl, target_bbl)
# https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
@ -756,7 +370,6 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
# <---------- 检查结果 ----------->
results_ = ""
original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
@ -773,9 +386,19 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
if modified_pdf_success:
yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 即将退出 ...', chatbot, history) # 刷新Gradio前端界面
result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
if os.path.exists(pj(work_folder, '..', 'translation')):
shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
# 将两个PDF拼接
if original_pdf_success:
try:
from .latex_toolbox import merge_pdfs
concat_pdf = pj(work_folder_modified, f'comparison.pdf')
merge_pdfs(origin_pdf, result_pdf, concat_pdf)
promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
except Exception as e:
pass
return True # 成功啦
else:
if n_fix>=max_try: break
@ -787,6 +410,7 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
tex_name_pure=f'{main_file_modified}',
n_fix=n_fix,
work_folder_modified=work_folder_modified,
fixed_line=fixed_line
)
yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
if not can_retry: break
@ -794,4 +418,29 @@ def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_f
return False # 失败啦
def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
# write html
try:
import shutil
from ..crazy_utils import construct_html
from toolbox import gen_time_str
ch = construct_html()
orig = ""
trans = ""
final = []
for c,r in zip(sp_file_contents, sp_file_result):
final.append(c)
final.append(r)
for i, k in enumerate(final):
if i%2==0:
orig = k
if i%2==1:
trans = k
ch.add_row(a=orig, b=trans)
create_report_file_name = f"{gen_time_str()}.trans.html"
ch.save_file(create_report_file_name)
shutil.copyfile(pj('./gpt_log/', create_report_file_name), pj(project_folder, create_report_file_name))
promote_file_to_downloadzone(file=f'./gpt_log/{create_report_file_name}', chatbot=chatbot)
except:
from toolbox import trimmed_format_exc
print('writing html result failed:', trimmed_format_exc())

View File

@ -0,0 +1,456 @@
import os, shutil
import re
import numpy as np
PRESERVE = 0
TRANSFORM = 1
pj = os.path.join
class LinkedListNode():
"""
Linked List Node
"""
def __init__(self, string, preserve=True) -> None:
self.string = string
self.preserve = preserve
self.next = None
self.range = None
# self.begin_line = 0
# self.begin_char = 0
def convert_to_linklist(text, mask):
root = LinkedListNode("", preserve=True)
current_node = root
for c, m, i in zip(text, mask, range(len(text))):
if (m==PRESERVE and current_node.preserve) \
or (m==TRANSFORM and not current_node.preserve):
# add
current_node.string += c
else:
current_node.next = LinkedListNode(c, preserve=(m==PRESERVE))
current_node = current_node.next
return root
def post_process(root):
# 修复括号
node = root
while True:
string = node.string
if node.preserve:
node = node.next
if node is None: break
continue
def break_check(string):
str_stack = [""] # (lv, index)
for i, c in enumerate(string):
if c == '{':
str_stack.append('{')
elif c == '}':
if len(str_stack) == 1:
print('stack fix')
return i
str_stack.pop(-1)
else:
str_stack[-1] += c
return -1
bp = break_check(string)
if bp == -1:
pass
elif bp == 0:
node.string = string[:1]
q = LinkedListNode(string[1:], False)
q.next = node.next
node.next = q
else:
node.string = string[:bp]
q = LinkedListNode(string[bp:], False)
q.next = node.next
node.next = q
node = node.next
if node is None: break
# 屏蔽空行和太短的句子
node = root
while True:
if len(node.string.strip('\n').strip(''))==0: node.preserve = True
if len(node.string.strip('\n').strip(''))<42: node.preserve = True
node = node.next
if node is None: break
node = root
while True:
if node.next and node.preserve and node.next.preserve:
node.string += node.next.string
node.next = node.next.next
node = node.next
if node is None: break
# 将前后断行符脱离
node = root
prev_node = None
while True:
if not node.preserve:
lstriped_ = node.string.lstrip().lstrip('\n')
if (prev_node is not None) and (prev_node.preserve) and (len(lstriped_)!=len(node.string)):
prev_node.string += node.string[:-len(lstriped_)]
node.string = lstriped_
rstriped_ = node.string.rstrip().rstrip('\n')
if (node.next is not None) and (node.next.preserve) and (len(rstriped_)!=len(node.string)):
node.next.string = node.string[len(rstriped_):] + node.next.string
node.string = rstriped_
# =====
prev_node = node
node = node.next
if node is None: break
# 标注节点的行数范围
node = root
n_line = 0
expansion = 2
while True:
n_l = node.string.count('\n')
node.range = [n_line-expansion, n_line+n_l+expansion] # 失败时,扭转的范围
n_line = n_line+n_l
node = node.next
if node is None: break
return root
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Latex segmentation with a binary mask (PRESERVE=0, TRANSFORM=1)
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def set_forbidden_text(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper
e.g. with pattern = r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}"
you can mask out (mask = PRESERVE so that text become untouchable for GPT)
everything between "\begin{equation}" and "\end{equation}"
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
mask[res.span()[0]:res.span()[1]] = PRESERVE
return text, mask
def reverse_forbidden_text(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\begin{abstract} blablablablablabla. \end{abstract}
"""
if isinstance(pattern, list): pattern = '|'.join(pattern)
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
if not forbid_wrapper:
mask[res.span()[0]:res.span()[1]] = TRANSFORM
else:
mask[res.regs[0][0]: res.regs[1][0]] = PRESERVE # '\\begin{abstract}'
mask[res.regs[1][0]: res.regs[1][1]] = TRANSFORM # abstract
mask[res.regs[1][1]: res.regs[0][1]] = PRESERVE # abstract
return text, mask
def set_forbidden_text_careful_brace(text, mask, pattern, flags=0):
"""
Add a preserve text area in this paper (text become untouchable for GPT).
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = -1
p = begin = end = res.regs[0][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p+1
mask[begin:end] = PRESERVE
return text, mask
def reverse_forbidden_text_careful_brace(text, mask, pattern, flags=0, forbid_wrapper=True):
"""
Move area out of preserve area (make text editable for GPT)
count the number of the braces so as to catch compelete text area.
e.g.
\caption{blablablablabla\texbf{blablabla}blablabla.}
"""
pattern_compile = re.compile(pattern, flags)
for res in pattern_compile.finditer(text):
brace_level = 0
p = begin = end = res.regs[1][0]
for _ in range(1024*16):
if text[p] == '}' and brace_level == 0: break
elif text[p] == '}': brace_level -= 1
elif text[p] == '{': brace_level += 1
p += 1
end = p
mask[begin:end] = TRANSFORM
if forbid_wrapper:
mask[res.regs[0][0]:begin] = PRESERVE
mask[end:res.regs[0][1]] = PRESERVE
return text, mask
def set_forbidden_text_begin_end(text, mask, pattern, flags=0, limit_n_lines=42):
"""
Find all \begin{} ... \end{} text block that with less than limit_n_lines lines.
Add it to preserve area
"""
pattern_compile = re.compile(pattern, flags)
def search_with_line_limit(text, mask):
for res in pattern_compile.finditer(text):
cmd = res.group(1) # begin{what}
this = res.group(2) # content between begin and end
this_mask = mask[res.regs[2][0]:res.regs[2][1]]
white_list = ['document', 'abstract', 'lemma', 'definition', 'sproof',
'em', 'emph', 'textit', 'textbf', 'itemize', 'enumerate']
if (cmd in white_list) or this.count('\n') >= limit_n_lines: # use a magical number 42
this, this_mask = search_with_line_limit(this, this_mask)
mask[res.regs[2][0]:res.regs[2][1]] = this_mask
else:
mask[res.regs[0][0]:res.regs[0][1]] = PRESERVE
return text, mask
return search_with_line_limit(text, mask)
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Latex Merge File
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def find_main_tex_file(file_manifest, mode):
"""
在多Tex文档中寻找主文件必须包含documentclass返回找到的第一个。
P.S. 但愿没人把latex模板放在里面传进来 (6.25 加入判定latex模板的代码)
"""
canidates = []
for texf in file_manifest:
if os.path.basename(texf).startswith('merge'):
continue
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
if r'\documentclass' in file_content:
canidates.append(texf)
else:
continue
if len(canidates) == 0:
raise RuntimeError('无法找到一个主Tex文件包含documentclass关键字')
elif len(canidates) == 1:
return canidates[0]
else: # if len(canidates) >= 2 通过一些Latex模板中常见但通常不会出现在正文的单词对不同latex源文件扣分取评分最高者返回
canidates_score = []
# 给出一些判定模板文档的词作为扣分项
unexpected_words = ['\LaTeX', 'manuscript', 'Guidelines', 'font', 'citations', 'rejected', 'blind review', 'reviewers']
expected_words = ['\input', '\ref', '\cite']
for texf in canidates:
canidates_score.append(0)
with open(texf, 'r', encoding='utf8', errors='ignore') as f:
file_content = f.read()
for uw in unexpected_words:
if uw in file_content:
canidates_score[-1] -= 1
for uw in expected_words:
if uw in file_content:
canidates_score[-1] += 1
select = np.argmax(canidates_score) # 取评分最高者返回
return canidates[select]
def rm_comments(main_file):
new_file_remove_comment_lines = []
for l in main_file.splitlines():
# 删除整行的空注释
if l.lstrip().startswith("%"):
pass
else:
new_file_remove_comment_lines.append(l)
main_file = '\n'.join(new_file_remove_comment_lines)
# main_file = re.sub(r"\\include{(.*?)}", r"\\input{\1}", main_file) # 将 \include 命令转换为 \input 命令
main_file = re.sub(r'(?<!\\)%.*', '', main_file) # 使用正则表达式查找半行注释, 并替换为空字符串
return main_file
def find_tex_file_ignore_case(fp):
dir_name = os.path.dirname(fp)
base_name = os.path.basename(fp)
if not base_name.endswith('.tex'): base_name+='.tex'
if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
# go case in-sensitive
import glob
for f in glob.glob(dir_name+'/*.tex'):
base_name_s = os.path.basename(fp)
if base_name_s.lower() == base_name.lower(): return f
return None
def merge_tex_files_(project_foler, main_file, mode):
"""
Merge Tex project recrusively
"""
main_file = rm_comments(main_file)
for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
f = s.group(1)
fp = os.path.join(project_foler, f)
fp = find_tex_file_ignore_case(fp)
if fp:
with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
else:
raise RuntimeError(f'找不到{fp}Tex源文件缺失')
c = merge_tex_files_(project_foler, c, mode)
main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
return main_file
def merge_tex_files(project_foler, main_file, mode):
"""
Merge Tex project recrusively
P.S. 顺便把CTEX塞进去以支持中文
P.S. 顺便把Latex的注释去除
"""
main_file = merge_tex_files_(project_foler, main_file, mode)
main_file = rm_comments(main_file)
if mode == 'translate_zh':
# find paper documentclass
pattern = re.compile(r'\\documentclass.*\n')
match = pattern.search(main_file)
assert match is not None, "Cannot find documentclass statement!"
position = match.end()
add_ctex = '\\usepackage{ctex}\n'
add_url = '\\usepackage{url}\n' if '{url}' not in main_file else ''
main_file = main_file[:position] + add_ctex + add_url + main_file[position:]
# fontset=windows
import platform
main_file = re.sub(r"\\documentclass\[(.*?)\]{(.*?)}", r"\\documentclass[\1,fontset=windows,UTF8]{\2}",main_file)
main_file = re.sub(r"\\documentclass{(.*?)}", r"\\documentclass[fontset=windows,UTF8]{\1}",main_file)
# find paper abstract
pattern_opt1 = re.compile(r'\\begin\{abstract\}.*\n')
pattern_opt2 = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
match_opt1 = pattern_opt1.search(main_file)
match_opt2 = pattern_opt2.search(main_file)
assert (match_opt1 is not None) or (match_opt2 is not None), "Cannot find paper abstract section!"
return main_file
"""
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
Post process
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
"""
def mod_inbraket(match):
"""
为啥chatgpt会把cite里面的逗号换成中文逗号呀
"""
# get the matched string
cmd = match.group(1)
str_to_modify = match.group(2)
# modify the matched string
str_to_modify = str_to_modify.replace('', ':') # 前面是中文冒号,后面是英文冒号
str_to_modify = str_to_modify.replace('', ',') # 前面是中文逗号,后面是英文逗号
# str_to_modify = 'BOOM'
return "\\" + cmd + "{" + str_to_modify + "}"
def fix_content(final_tex, node_string):
"""
Fix common GPT errors to increase success rate
"""
final_tex = re.sub(r"(?<!\\)%", "\\%", final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\ \{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\\ ([a-z]{2,10})\{", r"\\\1{", string=final_tex)
final_tex = re.sub(r"\\([a-z]{2,10})\{([^\}]*?)\}", mod_inbraket, string=final_tex)
if "Traceback" in final_tex and "[Local Message]" in final_tex:
final_tex = node_string # 出问题了,还原原文
if node_string.count('\\begin') != final_tex.count('\\begin'):
final_tex = node_string # 出问题了,还原原文
if node_string.count('\_') > 0 and node_string.count('\_') > final_tex.count('\_'):
# walk and replace any _ without \
final_tex = re.sub(r"(?<!\\)_", "\\_", final_tex)
def compute_brace_level(string):
# this function count the number of { and }
brace_level = 0
for c in string:
if c == "{": brace_level += 1
elif c == "}": brace_level -= 1
return brace_level
def join_most(tex_t, tex_o):
# this function join translated string and original string when something goes wrong
p_t = 0
p_o = 0
def find_next(string, chars, begin):
p = begin
while p < len(string):
if string[p] in chars: return p, string[p]
p += 1
return None, None
while True:
res1, char = find_next(tex_o, ['{','}'], p_o)
if res1 is None: break
res2, char = find_next(tex_t, [char], p_t)
if res2 is None: break
p_o = res1 + 1
p_t = res2 + 1
return tex_t[:p_t] + tex_o[p_o:]
if compute_brace_level(final_tex) != compute_brace_level(node_string):
# 出问题了,还原部分原文,保证括号正确
final_tex = join_most(final_tex, node_string)
return final_tex
def compile_latex_with_timeout(command, cwd, timeout=60):
import subprocess
process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd)
try:
stdout, stderr = process.communicate(timeout=timeout)
except subprocess.TimeoutExpired:
process.kill()
stdout, stderr = process.communicate()
print("Process timed out!")
return False
return True
def merge_pdfs(pdf1_path, pdf2_path, output_path):
import PyPDF2
Percent = 0.8
# Open the first PDF file
with open(pdf1_path, 'rb') as pdf1_file:
pdf1_reader = PyPDF2.PdfFileReader(pdf1_file)
# Open the second PDF file
with open(pdf2_path, 'rb') as pdf2_file:
pdf2_reader = PyPDF2.PdfFileReader(pdf2_file)
# Create a new PDF file to store the merged pages
output_writer = PyPDF2.PdfFileWriter()
# Determine the number of pages in each PDF file
num_pages = max(pdf1_reader.numPages, pdf2_reader.numPages)
# Merge the pages from the two PDF files
for page_num in range(num_pages):
# Add the page from the first PDF file
if page_num < pdf1_reader.numPages:
page1 = pdf1_reader.getPage(page_num)
else:
page1 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
# Add the page from the second PDF file
if page_num < pdf2_reader.numPages:
page2 = pdf2_reader.getPage(page_num)
else:
page2 = PyPDF2.PageObject.createBlankPage(pdf1_reader)
# Create a new empty page with double width
new_page = PyPDF2.PageObject.createBlankPage(
width = int(int(page1.mediaBox.getWidth()) + int(page2.mediaBox.getWidth()) * Percent),
height = max(page1.mediaBox.getHeight(), page2.mediaBox.getHeight())
)
new_page.mergeTranslatedPage(page1, 0, 0)
new_page.mergeTranslatedPage(page2, int(int(page1.mediaBox.getWidth())-int(page2.mediaBox.getWidth())* (1-Percent)), 0)
output_writer.addPage(new_page)
# Save the merged PDF file
with open(output_path, 'wb') as output_file:
output_writer.write(output_file)

View File

@ -1,87 +1,70 @@
from toolbox import CatchException, update_ui, gen_time_str
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import input_clipping
import copy, json
prompt = """
I have to achieve some functionalities by calling one of the functions below.
Your job is to find the correct funtion to use to satisfy my requirement,
and then write python code to call this function with correct parameters.
These are functions you are allowed to choose from:
1.
功能描述: 总结音视频内容
调用函数: ConcludeAudioContent(txt, llm_kwargs)
参数说明:
txt: 音频文件的路径
llm_kwargs: 模型参数, 永远给定None
2.
功能描述: 将每次对话记录写入Markdown格式的文件中
调用函数: WriteMarkdown()
3.
功能描述: 将指定目录下的PDF文件从英文翻译成中文
调用函数: BatchTranslatePDFDocuments_MultiThreaded(txt, llm_kwargs)
参数说明:
txt: PDF文件所在的路径
llm_kwargs: 模型参数, 永远给定None
4.
功能描述: 根据文本使用GPT模型生成相应的图像
调用函数: ImageGeneration(txt, llm_kwargs)
参数说明:
txt: 图像生成所用到的提示文本
llm_kwargs: 模型参数, 永远给定None
5.
功能描述: 对输入的word文档进行摘要生成
调用函数: SummarizingWordDocuments(input_path, output_path)
参数说明:
input_path: 待处理的word文档路径
output_path: 摘要生成后的文档路径
You should always anwser with following format:
----------------
Code:
```
class AutoAcademic(object):
def __init__(self):
self.selected_function = "FILL_CORRECT_FUNCTION_HERE" # e.g., "GenerateImage"
self.txt = "FILL_MAIN_PARAMETER_HERE" # e.g., "荷叶上的蜻蜓"
self.llm_kwargs = None
```
Explanation:
只有GenerateImage和生成图像相关, 因此选择GenerateImage函数。
----------------
Now, this is my requirement:
"""
def get_fn_lib():
return {
"BatchTranslatePDFDocuments_MultiThreaded": ("crazy_functions.批量翻译PDF文档_多线程", "批量翻译PDF文档"),
"SummarizingWordDocuments": ("crazy_functions.总结word文档", "总结word文档"),
"ImageGeneration": ("crazy_functions.图片生成", "图片生成"),
"TranslateMarkdownFromEnglishToChinese": ("crazy_functions.批量Markdown翻译", "Markdown中译英"),
"SummaryAudioVideo": ("crazy_functions.总结音视频", "总结音视频"),
"BatchTranslatePDFDocuments_MultiThreaded": {
"module": "crazy_functions.批量翻译PDF文档_多线程",
"function": "批量翻译PDF文档",
"description": "Translate PDF Documents",
"arg_1_description": "A path containing pdf files.",
},
"SummarizingWordDocuments": {
"module": "crazy_functions.总结word文档",
"function": "总结word文档",
"description": "Summarize Word Documents",
"arg_1_description": "A path containing Word files.",
},
"ImageGeneration": {
"module": "crazy_functions.图片生成",
"function": "图片生成",
"description": "Generate a image that satisfies some description.",
"arg_1_description": "Descriptions about the image to be generated.",
},
"TranslateMarkdownFromEnglishToChinese": {
"module": "crazy_functions.批量Markdown翻译",
"function": "Markdown中译英",
"description": "Translate Markdown Documents from English to Chinese.",
"arg_1_description": "A path containing Markdown files.",
},
"SummaryAudioVideo": {
"module": "crazy_functions.总结音视频",
"function": "总结音视频",
"description": "Get text from a piece of audio and summarize this audio.",
"arg_1_description": "A path containing audio files.",
},
}
functions = [
{
"name": k,
"description": v['description'],
"parameters": {
"type": "object",
"properties": {
"plugin_arg_1": {
"type": "string",
"description": v['arg_1_description'],
},
},
"required": ["plugin_arg_1"],
},
} for k, v in get_fn_lib().items()
]
def inspect_dependency(chatbot, history):
return True
def eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
import subprocess, sys, os, shutil, importlib
with open('gpt_log/void_terminal_runtime.py', 'w', encoding='utf8') as f:
f.write(code)
import importlib
try:
AutoAcademic = getattr(importlib.import_module('gpt_log.void_terminal_runtime', 'AutoAcademic'), 'AutoAcademic')
# importlib.reload(AutoAcademic)
auto_dict = AutoAcademic()
selected_function = auto_dict.selected_function
txt = auto_dict.txt
fp, fn = get_fn_lib()[selected_function]
tmp = get_fn_lib()[code['name']]
fp, fn = tmp['module'], tmp['function']
fn_plugin = getattr(importlib.import_module(fp, fn), fn)
yield from fn_plugin(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
arg = json.loads(code['arguments'])['plugin_arg_1']
yield from fn_plugin(arg, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
except:
from toolbox import trimmed_format_exc
chatbot.append(["执行错误", f"\n```\n{trimmed_format_exc()}\n```\n"])
@ -110,22 +93,27 @@ def 终端(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_
history = []
# 基本信息:功能、贡献者
chatbot.append(["函数插件功能?", "根据自然语言执行插件命令, 作者: binary-husky, 插件初始化中 ..."])
chatbot.append(["虚空终端插件功能?", "根据自然语言的描述, 执行任意插件命令."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# # 尝试导入依赖, 如果缺少依赖, 则给出安装建议
# dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
# if not dep_ok: return
# 输入
i_say = prompt + txt
i_say = txt
# 开始
llm_kwargs_function_call = copy.deepcopy(llm_kwargs)
llm_kwargs_function_call['llm_model'] = 'gpt-call-fn' # 修改调用函数
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
sys_prompt=""
llm_kwargs=llm_kwargs_function_call, chatbot=chatbot, history=[],
sys_prompt=functions
)
# 将代码转为动画
code = get_code_block(gpt_say)
yield from eval_code(code, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
res = json.loads(gpt_say)['choices'][0]
if res['finish_reason'] == 'function_call':
code = json.loads(gpt_say)['choices'][0]
yield from eval_code(code['message']['function_call'], llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port)
else:
chatbot.append(["无法调用相关功能", res])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面

View File

@ -0,0 +1,213 @@
from collections.abc import Callable, Iterable, Mapping
from typing import Any
from toolbox import CatchException, update_ui, gen_time_str, trimmed_format_exc, promote_file_to_downloadzone, clear_file_downloadzone
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
from .crazy_utils import input_clipping, try_install_deps
from multiprocessing import Process, Pipe
import os
templete = """
```python
import ... # Put dependencies here, e.g. import numpy as np
class TerminalFunction(object): # Do not change the name of the class, The name of the class must be `TerminalFunction`
def run(self, path): # The name of the function must be `run`, it takes only a positional argument.
# rewrite the function you have just written here
...
return generated_file_path
```
"""
def inspect_dependency(chatbot, history):
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return True
def get_code_block(reply):
import re
pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
matches = re.findall(pattern, reply) # find all code blocks in text
if len(matches) == 1:
return matches[0].strip('python') # code block
for match in matches:
if 'class TerminalFunction' in match:
return match.strip('python') # code block
raise RuntimeError("GPT is not generating proper code.")
def gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history):
# 输入
prompt_compose = [
f'Your job:\n'
f'1. write a single Python function, which takes a path of a `{file_type}` file as the only argument and returns a `string` containing the result of analysis or the path of generated files. \n',
f"2. You should write this function to perform following task: " + txt + "\n",
f"3. Wrap the output python function with markdown codeblock."
]
i_say = "".join(prompt_compose)
demo = []
# 第一步
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=i_say,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=demo,
sys_prompt= r"You are a programmer."
)
history.extend([i_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# 第二步
prompt_compose = [
"If previous stage is successful, rewrite the function you have just written to satisfy following templete: \n",
templete
]
i_say = "".join(prompt_compose); inputs_show_user = "If previous stage is successful, rewrite the function you have just written to satisfy executable templete. "
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=inputs_show_user,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt= r"You are a programmer."
)
code_to_return = gpt_say
history.extend([i_say, gpt_say])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
# # 第三步
# i_say = "Please list to packages to install to run the code above. Then show me how to use `try_install_deps` function to install them."
# i_say += 'For instance. `try_install_deps(["opencv-python", "scipy", "numpy"])`'
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
# inputs=i_say, inputs_show_user=inputs_show_user,
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
# sys_prompt= r"You are a programmer."
# )
# # # 第三步
# i_say = "Show me how to use `pip` to install packages to run the code above. "
# i_say += 'For instance. `pip install -r opencv-python scipy numpy`'
# installation_advance = yield from request_gpt_model_in_new_thread_with_ui_alive(
# inputs=i_say, inputs_show_user=i_say,
# llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
# sys_prompt= r"You are a programmer."
# )
installation_advance = ""
return code_to_return, installation_advance, txt, file_type, llm_kwargs, chatbot, history
def make_module(code):
module_file = 'gpt_fn_' + gen_time_str().replace('-','_')
with open(f'gpt_log/{module_file}.py', 'w', encoding='utf8') as f:
f.write(code)
def get_class_name(class_string):
import re
# Use regex to extract the class name
class_name = re.search(r'class (\w+)\(', class_string).group(1)
return class_name
class_name = get_class_name(code)
return f"gpt_log.{module_file}->{class_name}"
def init_module_instance(module):
import importlib
module_, class_ = module.split('->')
init_f = getattr(importlib.import_module(module_), class_)
return init_f()
def for_immediate_show_off_when_possible(file_type, fp, chatbot):
if file_type in ['png', 'jpg']:
image_path = os.path.abspath(fp)
chatbot.append(['这是一张图片, 展示如下:',
f'本地文件地址: <br/>`{image_path}`<br/>'+
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
])
return chatbot
def subprocess_worker(instance, file_path, return_dict):
return_dict['result'] = instance.run(file_path)
@CatchException
def 虚空终端CodeInterpreter(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数,暂时没有用武之地
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
# 清空历史,以免输入溢出
history = []; clear_file_downloadzone(chatbot)
# 基本信息:功能、贡献者
chatbot.append([
"函数插件功能?",
"CodeInterpreter开源版, 此插件处于开发阶段, 建议暂时不要使用, 作者: binary-husky, 插件初始化中 ..."
])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 尝试导入依赖, 如果缺少依赖, 则给出安装建议
dep_ok = yield from inspect_dependency(chatbot=chatbot, history=history) # 刷新界面
if not dep_ok: return
# 读取文件
if ("recently_uploaded_files" in plugin_kwargs) and (plugin_kwargs["recently_uploaded_files"] == ""): plugin_kwargs.pop("recently_uploaded_files")
recently_uploaded_files = plugin_kwargs.get("recently_uploaded_files", None)
file_path = recently_uploaded_files[-1]
file_type = file_path.split('.')[-1]
# 粗心检查
if 'private_upload' in txt:
chatbot.append([
"...",
f"请在输入框内填写需求,然后再次点击该插件(文件路径 {file_path} 已经被记忆)"
])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 开始干正事
for j in range(5): # 最多重试5次
try:
code, installation_advance, txt, file_type, llm_kwargs, chatbot, history = \
yield from gpt_interact_multi_step(txt, file_type, llm_kwargs, chatbot, history)
code = get_code_block(code)
res = make_module(code)
instance = init_module_instance(res)
break
except Exception as e:
chatbot.append([f"{j}次代码生成尝试,失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# 代码生成结束, 开始执行
try:
import multiprocessing
manager = multiprocessing.Manager()
return_dict = manager.dict()
p = multiprocessing.Process(target=subprocess_worker, args=(instance, file_path, return_dict))
# only has 10 seconds to run
p.start(); p.join(timeout=10)
if p.is_alive(): p.terminate(); p.join()
p.close()
res = return_dict['result']
# res = instance.run(file_path)
except Exception as e:
chatbot.append(["执行失败了", f"错误追踪\n```\n{trimmed_format_exc()}\n```\n"])
# chatbot.append(["如果是缺乏依赖,请参考以下建议", installation_advance])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 顺利完成,收尾
res = str(res)
if os.path.exists(res):
chatbot.append(["执行成功了,结果是一个有效文件", "结果:" + res])
new_file_path = promote_file_to_downloadzone(res, chatbot=chatbot)
chatbot = for_immediate_show_off_when_possible(file_type, new_file_path, chatbot)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
else:
chatbot.append(["执行成功了,结果是一个字符串", "结果:" + res])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
"""
测试:
裁剪图像,保留下半部分
交换图像的蓝色通道和红色通道
将图像转为灰度图像
将csv文件转excel表格
"""

View File

@ -0,0 +1,137 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import objdump, objload
from .crazy_utils import input_clipping
class ThreeJSPlot():
def __init__(self) -> None:
self.files = None
def read_files(self, files):
self.files = files
def launch_render_interface(self):
from vhmap.mcom import mcom
self.visual_bridge = mcom()
def 解析源代码炫酷版(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import os, copy
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
msg = '正常'
summary_batch_isolation = True
inputs_array = []
inputs_show_user_array = []
history_array = []
sys_prompt_array = []
report_part_1 = []
assert len(file_manifest) <= 512, "源文件太多超过512个, 请缩减输入文件的数量。或者您也可以选择删除此行警告并修改代码拆分file_manifest列表从而实现分批次处理。"
############################## <第一步,逐个文件分析,多线程> ##################################
for index, fp in enumerate(file_manifest):
# 读取文件
with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)},文件代码是 ```{file_content}```'
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
# 装载请求内容
inputs_array.append(i_say)
inputs_show_user_array.append(i_say_show_user)
history_array.append([])
sys_prompt_array.append("你是一个程序架构分析师,正在分析一个源代码项目。你的回答必须简单明了。")
def callback_when_intel_update(gpt_reply_array):
objdump((gpt_reply_array, file_manifest))
return
# 文件读取完成对每一个源代码文件生成一个请求线程发送到chatgpt进行分析
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array = inputs_array,
inputs_show_user_array = inputs_show_user_array,
history_array = history_array,
sys_prompt_array = sys_prompt_array,
llm_kwargs = llm_kwargs,
chatbot = chatbot,
show_user_at_complete = True,
callback_fn=callback_when_intel_update,
)
# 全部文件解析完成,结果写入文件,准备对工程源代码进行汇总分析
report_part_1 = copy.deepcopy(gpt_response_collection)
history_to_return = report_part_1
res = write_results_to_file(report_part_1)
chatbot.append(("完成?", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
############################## <第二步,综合,单线程,分组+迭代处理> ##################################
############################## <END> ##################################
history_to_return.extend([])
res = write_results_to_file(history_to_return)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
@CatchException
def 解析一个Python项目炫酷版(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
history = [] # 清空历史,以免输入溢出
import glob, os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码炫酷版(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析任意code项目炫酷版(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
txt_pattern = plugin_kwargs.get("advanced_arg")
txt_pattern = txt_pattern.replace("", ",")
# 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
# 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
# 将要忽略匹配的文件名(例如: ^README.md)
pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
# 生成正则表达式
pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
history.clear()
import glob, os, re
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
extract_folder_path = maybe_dir[0]
else:
extract_folder_path = project_folder
# 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@ -22,8 +22,10 @@ def main():
# 问询记录, python 版本建议3.9+(越新越好)
import logging, uuid
os.makedirs("gpt_log", exist_ok=True)
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
# Disable logging output from the 'httpx' logger
logging.getLogger("httpx").setLevel(logging.WARNING)
print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
# 一些普通功能模块
@ -158,7 +160,7 @@ def main():
click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
cancel_handles.append(click_handle)
# 文件上传区接收文件后与chatbot的互动
file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes], [chatbot, txt, txt2])
file_upload.upload(on_file_uploaded, [cookies, file_upload, chatbot, txt, txt2, checkboxes], [cookies, chatbot, txt, txt2])
# 函数插件-固定按钮区
for k in crazy_fns:
if not crazy_fns[k].get("AsButton", True): continue

View File

@ -248,7 +248,6 @@ if "moss" in AVAIL_LLM_MODELS:
if "stack-claude" in AVAIL_LLM_MODELS:
from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
from .bridge_stackclaude import predict as claude_ui
# claude
model_info.update({
"stack-claude": {
"fn_with_ui": claude_ui,
@ -263,7 +262,6 @@ if "newbing-free" in AVAIL_LLM_MODELS:
try:
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
from .bridge_newbingfree import predict as newbingfree_ui
# claude
model_info.update({
"newbing-free": {
"fn_with_ui": newbingfree_ui,
@ -280,7 +278,6 @@ if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free
try:
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
from .bridge_newbingfree import predict as newbingfree_ui
# claude
model_info.update({
"newbing": {
"fn_with_ui": newbingfree_ui,
@ -297,7 +294,6 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
try:
from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
from .bridge_chatglmft import predict as chatglmft_ui
# claude
model_info.update({
"chatglmft": {
"fn_with_ui": chatglmft_ui,
@ -310,7 +306,22 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
})
except:
print(trimmed_format_exc())
if "internlm" in AVAIL_LLM_MODELS:
try:
from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
from .bridge_internlm import predict as internlm_ui
model_info.update({
"internlm": {
"fn_with_ui": internlm_ui,
"fn_without_ui": internlm_noui,
"endpoint": None,
"max_token": 4096,
"tokenizer": tokenizer_gpt35,
"token_cnt": get_token_num_gpt35,
}
})
except:
print(trimmed_format_exc())
def LLM_CATCH_EXCEPTION(f):
"""

View File

@ -174,9 +174,10 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
chunk = next(stream_response)
except StopIteration:
# 非OpenAI官方接口的出现这样的报错OpenAI和API2D不会走这里
from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk.decode())}")
yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk.decode()) # 刷新界面
chunk_decoded = chunk.decode()
error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
return
# print(chunk.decode()[6:])
@ -187,7 +188,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
if chunk:
try:
chunk_decoded = chunk.decode()
# 前者API2D的
# 前者API2D的结束条件后者是OPENAI的结束条件
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
# 判定为数据流的结束gpt_replying_buffer也写完了
logging.info(f'[response] {gpt_replying_buffer}')
@ -200,13 +201,18 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
history[-1] = gpt_replying_buffer
chatbot[-1] = (history[-2], history[-1])
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
except Exception as e:
traceback.print_exc()
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
chunk = get_full_error(chunk, stream_response)
chunk_decoded = chunk.decode()
error_msg = chunk_decoded
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
print(error_msg)
return
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
from .bridge_all import model_info
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
if "reduce the length" in error_msg:
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入history[-2] 是本次输入, history[-1] 是本次输出
@ -232,8 +238,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
from toolbox import regular_txt_to_markdown
tb_str = '```\n' + trimmed_format_exc() + '```'
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
return
return chatbot, history
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
"""

View File

@ -0,0 +1,315 @@
from transformers import AutoModel, AutoTokenizer
import time
import threading
import importlib
from toolbox import update_ui, get_conf, Singleton
from multiprocessing import Process, Pipe
model_name = "InternLM"
cmd_to_install = "`pip install ???`"
load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存CPU或显存GPU也许会导致低配计算机卡死 ……"
def try_to_import_special_deps():
import sentencepiece
user_prompt = "<|User|>:{user}<eoh>\n"
robot_prompt = "<|Bot|>:{robot}<eoa>\n"
cur_query_prompt = "<|User|>:{user}<eoh>\n<|Bot|>:"
def combine_history(prompt, hist):
messages = hist
total_prompt = ""
for message in messages:
cur_content = message
cur_prompt = user_prompt.replace("{user}", cur_content[0])
total_prompt += cur_prompt
cur_prompt = robot_prompt.replace("{robot}", cur_content[1])
total_prompt += cur_prompt
total_prompt = total_prompt + cur_query_prompt.replace("{user}", prompt)
return total_prompt
@Singleton
class GetInternlmHandle(Process):
def __init__(self):
# ⭐主进程执行
super().__init__(daemon=True)
self.parent, self.child = Pipe()
self._model = None
self._tokenizer = None
self.info = ""
self.success = True
self.check_dependency()
self.start()
self.threadLock = threading.Lock()
def ready(self):
# ⭐主进程执行
return self._model is not None
def load_model_and_tokenizer(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
device, = get_conf('LOCAL_MODEL_DEVICE')
if self._model is None:
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
if device=='cpu':
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
else:
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
model = model.eval()
return model, tokenizer
def llm_stream_generator(self, **kwargs):
import torch
import logging
import copy
import warnings
import torch.nn as nn
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
def adaptor():
model = self._model
tokenizer = self._tokenizer
prompt = kwargs['query']
max_length = kwargs['max_length']
top_p = kwargs['top_p']
temperature = kwargs['temperature']
history = kwargs['history']
real_prompt = combine_history(prompt, history)
return model, tokenizer, real_prompt, max_length, top_p, temperature
model, tokenizer, prompt, max_length, top_p, temperature = adaptor()
prefix_allowed_tokens_fn = None
logits_processor = None
stopping_criteria = None
additional_eos_token_id = 103028
generation_config = None
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
# 🏃‍♂️🏃‍♂️🏃‍♂️ https://github.com/InternLM/InternLM/blob/efbf5335709a8c8faeac6eaf07193973ff1d56a1/web_demo.py#L25
inputs = tokenizer([prompt], padding=True, return_tensors="pt")
input_length = len(inputs["input_ids"][0])
for k, v in inputs.items():
inputs[k] = v.cuda()
input_ids = inputs["input_ids"]
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
if generation_config is None:
generation_config = model.generation_config
generation_config = copy.deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs)
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id]
if additional_eos_token_id is not None:
eos_token_id.append(additional_eos_token_id)
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
if has_default_max_length and generation_config.max_new_tokens is None:
warnings.warn(
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
" recommend using `max_new_tokens` to control the maximum length of the generation.",
UserWarning,
)
elif generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
if not has_default_max_length:
logging.warn(
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
"Please refer to the documentation for more information. "
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
UserWarning,
)
if input_ids_seq_length >= generation_config.max_length:
input_ids_string = "input_ids"
logging.warning(
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`."
)
# 2. Set generation parameters if not already defined
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
logits_processor = model._get_logits_processor(
generation_config=generation_config,
input_ids_seq_length=input_ids_seq_length,
encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor,
)
stopping_criteria = model._get_stopping_criteria(
generation_config=generation_config, stopping_criteria=stopping_criteria
)
logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None
while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token
outputs = model(
**model_inputs,
return_dict=True,
output_attentions=False,
output_hidden_states=False,
)
next_token_logits = outputs.logits[:, -1, :]
# pre-process distribution
next_token_scores = logits_processor(input_ids, next_token_logits)
next_token_scores = logits_warper(input_ids, next_token_scores)
# sample
probs = nn.functional.softmax(next_token_scores, dim=-1)
if generation_config.do_sample:
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
else:
next_tokens = torch.argmax(probs, dim=-1)
# update generated ids, model inputs, and length for next step
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
model_kwargs = model._update_model_kwargs_for_generation(
outputs, model_kwargs, is_encoder_decoder=False
)
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
output_token_ids = input_ids[0].cpu().tolist()
output_token_ids = output_token_ids[input_length:]
for each_eos_token_id in eos_token_id:
if output_token_ids[-1] == each_eos_token_id:
output_token_ids = output_token_ids[:-1]
response = tokenizer.decode(output_token_ids)
yield response
# stop when each sentence is finished, or if we exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
return
def check_dependency(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
try:
try_to_import_special_deps()
self.info = "依赖检测通过"
self.success = True
except:
self.info = f"缺少{model_name}的依赖,如果要使用{model_name}除了基础的pip依赖以外您还需要运行{cmd_to_install}安装{model_name}的依赖。"
self.success = False
def run(self):
# 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
# 第一次运行,加载参数
try:
self._model, self._tokenizer = self.load_model_and_tokenizer()
except:
from toolbox import trimmed_format_exc
self.child.send(f'[Local Message] 不能正常加载{model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
raise RuntimeError(f"不能正常加载{model_name}的参数!")
while True:
# 进入任务等待状态
kwargs = self.child.recv()
# 收到消息,开始请求
try:
for response_full in self.llm_stream_generator(**kwargs):
self.child.send(response_full)
except:
from toolbox import trimmed_format_exc
self.child.send(f'[Local Message] 调用{model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
# 请求处理结束,开始下一个循环
self.child.send('[Finish]')
def stream_chat(self, **kwargs):
# ⭐主进程执行
self.threadLock.acquire()
self.parent.send(kwargs)
while True:
res = self.parent.recv()
if res != '[Finish]':
yield res
else:
break
self.threadLock.release()
# ------------------------------------------------------------------------------------------------------------------------
# 🔌💻 GPT-Academic
# ------------------------------------------------------------------------------------------------------------------------
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
"""
⭐多线程方法
函数的说明请见 request_llm/bridge_all.py
"""
_llm_handle = GetInternlmHandle()
if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
if not _llm_handle.success:
error = _llm_handle.info
_llm_handle = None
raise RuntimeError(error)
# chatglm 没有 sys_prompt 接口因此把prompt加入 history
history_feedin = []
history_feedin.append(["What can I do?", sys_prompt])
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
response = ""
for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
if len(observe_window) >= 1: observe_window[0] = response
if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("程序终止。")
return response
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
"""
⭐单线程方法
函数的说明请见 request_llm/bridge_all.py
"""
chatbot.append((inputs, ""))
_llm_handle = GetInternlmHandle()
chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.info)
yield from update_ui(chatbot=chatbot, history=[])
if not _llm_handle.success:
_llm_handle = None
return
if additional_fn is not None:
import core_functional
importlib.reload(core_functional) # 热更新prompt
core_functional = core_functional.get_core_functions()
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
# 处理历史信息
history_feedin = []
history_feedin.append(["What can I do?", system_prompt] )
for i in range(len(history)//2):
history_feedin.append([history[2*i], history[2*i+1]] )
# 开始接收chatglm的回复
response = f"[Local Message]: 等待{model_name}响应中 ..."
for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
chatbot[-1] = (inputs, response)
yield from update_ui(chatbot=chatbot, history=history)
# 总结输出
if response == f"[Local Message]: 等待{model_name}响应中 ...":
response = f"[Local Message]: {model_name}响应异常 ..."
history.extend([inputs, response])
yield from update_ui(chatbot=chatbot, history=history)

View File

@ -447,6 +447,15 @@ class _ChatHub:
"""
Ask a question to the bot
"""
req_header = HEADERS
if self.cookies is not None:
ws_cookies = []
for cookie in self.cookies:
ws_cookies.append(f"{cookie['name']}={cookie['value']}")
req_header.update({
'Cookie': ';'.join(ws_cookies),
})
timeout = aiohttp.ClientTimeout(total=30)
self.session = aiohttp.ClientSession(timeout=timeout)
@ -455,7 +464,7 @@ class _ChatHub:
# Check if websocket is closed
self.wss = await self.session.ws_connect(
wss_link,
headers=HEADERS,
headers=req_header,
ssl=ssl_context,
proxy=self.proxy,
autoping=False,
@ -510,7 +519,11 @@ class _ChatHub:
resp_txt_no_link = ""
while not final:
msg = await self.wss.receive()
try:
objects = msg.data.split(DELIMITER)
except :
continue
for obj in objects:
if obj is None or not obj:
continue

View File

@ -14,7 +14,8 @@ if __name__ == "__main__":
# from request_llm.bridge_moss import predict_no_ui_long_connection
# from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
# from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection
from request_llm.bridge_claude import predict_no_ui_long_connection
# from request_llm.bridge_claude import predict_no_ui_long_connection
from request_llm.bridge_internlm import predict_no_ui_long_connection
llm_kwargs = {
'max_length': 512,
@ -22,45 +23,8 @@ if __name__ == "__main__":
'temperature': 1,
}
result = predict_no_ui_long_connection(inputs="你好",
result = predict_no_ui_long_connection( inputs="请问什么是质子?",
llm_kwargs=llm_kwargs,
history=[],
history=["你好", "我好!"],
sys_prompt="")
print('final result:', result)
# # print(result)
# from multiprocessing import Process, Pipe
# class GetGLMHandle(Process):
# def __init__(self):
# super().__init__(daemon=True)
# pass
# def run(self):
# # 子进程执行
# # 第一次运行,加载参数
# def validate_path():
# import os, sys
# dir_name = os.path.dirname(__file__)
# root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
# os.chdir(root_dir_assume + '/request_llm/jittorllms')
# sys.path.append(root_dir_assume + '/request_llm/jittorllms')
# validate_path() # validate path so you can run from base directory
# jittorllms_model = None
# import types
# try:
# if jittorllms_model is None:
# from models import get_model
# # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
# args_dict = {'model': 'chatrwkv'}
# print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))')
# jittorllms_model = get_model(types.SimpleNamespace(**args_dict))
# print('done get model')
# except:
# # self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。')
# raise RuntimeError("不能正常加载jittorllms的参数")
# x = GetGLMHandle()
# x.start()
# input()

View File

@ -18,3 +18,4 @@ openai
numpy
arxiv
rich
pypdf2==2.12.1

View File

@ -60,6 +60,9 @@ def ArgsGeneralWrapper(f):
plugin_kwargs = {
"advanced_arg": plugin_advanced_arg,
}
if "recently_uploaded_files" in cookies:
plugin_kwargs.update({"recently_uploaded_files": cookies["recently_uploaded_files"]})
chatbot_with_cookie = ChatBotWithCookies(cookies)
chatbot_with_cookie.write_list(chatbot)
if cookies.get('lock_plugin', None) is None:
@ -462,6 +465,10 @@ def find_recent_files(directory):
return recent_files
def clear_file_downloadzone(chatbot):
if chatbot:
chatbot._cookies.update({'file_to_promote': []})
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区
import shutil
@ -476,13 +483,14 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
if 'file_to_promote' in chatbot._cookies: current = chatbot._cookies['file_to_promote']
else: current = []
chatbot._cookies.update({'file_to_promote': [new_path] + current})
return new_path
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
def on_file_uploaded(cookies, files, chatbot, txt, txt2, checkboxes):
"""
当文件被上传时的回调函数
"""
if len(files) == 0:
return chatbot, txt
return cookies, chatbot, txt
import shutil
import os
import time
@ -512,7 +520,8 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
f'[Local Message] 收到以下文件: \n\n{moved_files_str}' +
f'\n\n调用路径参数已自动修正到: \n\n{txt}' +
f'\n\n现在您点击任意“红颜色”标识的函数插件时,以上文件将被作为输入参数'+err_msg])
return chatbot, txt, txt2
cookies.update({"recently_uploaded_files": moved_files})
return cookies, chatbot, txt, txt2
def on_report_generated(cookies, files, chatbot):
@ -538,6 +547,10 @@ def load_chat_cookies():
return {'api_key': API_KEY, 'llm_model': LLM_MODEL}
def is_openai_api_key(key):
CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
if len(CUSTOM_API_KEY_PATTERN) != 0:
API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
else:
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
return bool(API_MATCH_ORIGINAL)
@ -594,7 +607,7 @@ def select_api_key(keys, llm_model):
if is_azure_api_key(k): avail_key_list.append(k)
if len(avail_key_list) == 0:
raise RuntimeError(f"您提供的api-key不满足要求不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源右下角更换模型菜单中可切换openai,azureapi2d请求源")
raise RuntimeError(f"您提供的api-key不满足要求不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源右下角更换模型菜单中可切换openai,azure,claude,api2d请求源)")
api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key
@ -670,12 +683,12 @@ def read_single_conf_with_lru_cache(arg):
# 在读取API_KEY时检查一下是不是忘了改config
if arg == 'API_KEY':
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和API2D的api-key。也支持同时填写多个api-key如API_KEY=\"openai-key1,openai-key2,api2d-key3\"")
print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s)也可以在问题输入区输入临时的api-key(s),然后回车键提交后即可生效。")
if is_any_api_key(r):
print亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
else:
print亮红( "[API_KEY] 正确的 API_KEY 'sk'开头的51位密钥OpenAI或者 'fk'开头的41位密钥请在config文件中修改API密钥之后再运行。")
print亮红( "[API_KEY] 的 API_KEY 不满足任何一种已知的密钥格式请在config文件中修改API密钥之后再运行。")
if arg == 'proxies':
if r is None:
print亮红('[PROXY] 网络代理状态未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议检查USE_PROXY选项是否修改。')
@ -685,6 +698,7 @@ def read_single_conf_with_lru_cache(arg):
return r
@lru_cache(maxsize=128)
def get_conf(*args):
# 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
res = []
@ -884,3 +898,15 @@ def objload(file='objdump.tmp'):
with open(file, 'rb') as f:
return pickle.load(f)
def Singleton(cls):
"""
一个单实例装饰器
"""
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton