合并master修改

This commit is contained in:
w_xiaolizu
2023-04-23 10:04:32 +08:00
16 changed files with 435 additions and 59 deletions

View File

@ -108,6 +108,13 @@ def test_联网回答问题():
print("当前问答:", cb[-1][-1].replace("\n"," "))
for i, it in enumerate(cb): print亮蓝(it[0]); print亮黄(it[1])
def test_解析ipynb文件():
from crazy_functions.解析JupyterNotebook import 解析ipynb文件
txt = "crazy_functions/test_samples"
for cookies, cb, hist, msg in 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
print(cb)
# test_解析一个Python项目()
# test_Latex英文润色()
# test_Markdown中译英()
@ -116,9 +123,8 @@ def test_联网回答问题():
# test_总结word文档()
# test_下载arxiv论文并翻译摘要()
# test_解析一个Cpp项目()
test_联网回答问题()
# test_联网回答问题()
test_解析ipynb文件()
input("程序完成,回车退出。")
print("退出。")

View File

@ -0,0 +1,140 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file
fast_debug = True
class PaperFileGroup():
def __init__(self):
self.file_paths = []
self.file_contents = []
self.sp_file_contents = []
self.sp_file_index = []
self.sp_file_tag = []
# count_token
from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(
enc.encode(txt, disallowed_special=()))
self.get_token_num = get_token_num
def run_file_split(self, max_token_limit=1900):
"""
将长文本分离开来
"""
for index, file_content in enumerate(self.file_contents):
if self.get_token_num(file_content) < max_token_limit:
self.sp_file_contents.append(file_content)
self.sp_file_index.append(index)
self.sp_file_tag.append(self.file_paths[index])
else:
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(
file_content, self.get_token_num, max_token_limit)
for j, segment in enumerate(segments):
self.sp_file_contents.append(segment)
self.sp_file_index.append(index)
self.sp_file_tag.append(
self.file_paths[index] + f".part-{j}.txt")
def parseNotebook(filename, enable_markdown=1):
import json
CodeBlocks = []
with open(filename, 'r', encoding='utf-8', errors='replace') as f:
notebook = json.load(f)
for cell in notebook['cells']:
if cell['cell_type'] == 'code' and cell['source']:
# remove blank lines
cell['source'] = [line for line in cell['source'] if line.strip()
!= '']
CodeBlocks.append("".join(cell['source']))
elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
cell['source'] = [line for line in cell['source'] if line.strip()
!= '']
CodeBlocks.append("Markdown:"+"".join(cell['source']))
Code = ""
for idx, code in enumerate(CodeBlocks):
Code += f"This is {idx+1}th code block: \n"
Code += code+"\n"
return Code
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
pfg = PaperFileGroup()
print(file_manifest)
for fp in file_manifest:
file_content = parseNotebook(fp, enable_markdown=1)
pfg.file_paths.append(fp)
pfg.file_contents.append(file_content)
# <-------- 拆分过长的IPynb文件 ---------->
pfg.run_file_split(max_token_limit=1024)
n_split = len(pfg.sp_file_contents)
inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
r"Start a new line for a block and block num use Chinese." +
f"\n\n{frag}" for frag in pfg.sp_file_contents]
inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
sys_prompt_array = ["You are a professional programmer."] * n_split
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
inputs_array=inputs_array,
inputs_show_user_array=inputs_show_user_array,
llm_kwargs=llm_kwargs,
chatbot=chatbot,
history_array=[[""] for _ in range(n_split)],
sys_prompt_array=sys_prompt_array,
# max_workers=5, # OpenAI所允许的最大并行过载
scroller_max_len=80
)
# <-------- 整理结果,退出 ---------->
block_result = " \n".join(gpt_response_collection)
chatbot.append(("解析的结果如下", block_result))
history.extend(["解析的结果如下", block_result])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
# <-------- 写入文件,退出 ---------->
res = write_results_to_file(history)
chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
@CatchException
def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
chatbot.append([
"函数插件功能?",
"对IPynb文件进行解析。Contributor: codycjy."])
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
history = [] # 清空历史
import glob
import os
if os.path.exists(txt):
project_folder = txt
else:
if txt == "":
txt = '空空如也的输入栏'
report_execption(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
if txt.endswith('.ipynb'):
file_manifest = [txt]
else:
file_manifest = [f for f in glob.glob(
f'{project_folder}/**/*.ipynb', recursive=True)]
if len(file_manifest) == 0:
report_execption(chatbot, history,
a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )

View File

@ -11,7 +11,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
history_array = []
sys_prompt_array = []
report_part_1 = []
assert len(file_manifest) <= 512, "源文件太多超过512个, 请缩减输入文件的数量。或者您也可以选择删除此行警告并修改代码拆分file_manifest列表从而实现分批次处理。"
############################## <第一步,逐个文件分析,多线程> ##################################
for index, fp in enumerate(file_manifest):
@ -63,10 +63,10 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
i_say = f'根据以上分析对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能包括{previous_iteration_files_string})。'
inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
this_iteration_history.append(last_iteration_result)
result = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
history=this_iteration_history, # 迭代之前的分析
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。")
report_part_2.extend([i_say, result])
@ -222,8 +222,8 @@ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
history = [] # 清空历史,以免输入溢出
@ -243,9 +243,9 @@ def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
history = [] # 清空历史,以免输入溢出
@ -263,4 +263,45 @@ def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
@CatchException
def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
txt_pattern = plugin_kwargs.get("advanced_arg")
txt_pattern = txt_pattern.replace("", ",")
# 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
# 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
# 将要忽略匹配的文件名(例如: ^README.md)
pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
# 生成正则表达式
pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
history.clear()
import glob, os, re
if os.path.exists(txt):
project_folder = txt
else:
if txt == "": txt = '空空如也的输入栏'
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
# 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
extract_folder_path = maybe_dir[0]
else:
extract_folder_path = project_folder
# 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@ -25,6 +25,35 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
retry_times_at_unknown_error=0
)
history.append(txt)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
@CatchException
def 同时问询_指定模型(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
"""
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
llm_kwargs gpt模型参数如温度和top_p等一般原样传递下去就行
plugin_kwargs 插件模型的参数如温度和top_p等一般原样传递下去就行
chatbot 聊天显示框的句柄,用于显示给用户
history 聊天历史,前情提要
system_prompt 给gpt的静默提醒
web_port 当前软件运行的端口号
"""
history = [] # 清空历史,以免输入溢出
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间我们先及时地做一次界面更新
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
llm_kwargs['llm_model'] = plugin_kwargs.get("advanced_arg", 'chatglm&gpt-3.5-turbo') # 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口用&符号分隔
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
inputs=txt, inputs_show_user=txt,
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
sys_prompt=system_prompt,
retry_times_at_unknown_error=0
)
history.append(txt)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新