合并master修改
This commit is contained in:
@ -108,6 +108,13 @@ def test_联网回答问题():
|
||||
print("当前问答:", cb[-1][-1].replace("\n"," "))
|
||||
for i, it in enumerate(cb): print亮蓝(it[0]); print亮黄(it[1])
|
||||
|
||||
def test_解析ipynb文件():
|
||||
from crazy_functions.解析JupyterNotebook import 解析ipynb文件
|
||||
txt = "crazy_functions/test_samples"
|
||||
for cookies, cb, hist, msg in 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
print(cb)
|
||||
|
||||
|
||||
# test_解析一个Python项目()
|
||||
# test_Latex英文润色()
|
||||
# test_Markdown中译英()
|
||||
@ -116,9 +123,8 @@ def test_联网回答问题():
|
||||
# test_总结word文档()
|
||||
# test_下载arxiv论文并翻译摘要()
|
||||
# test_解析一个Cpp项目()
|
||||
|
||||
test_联网回答问题()
|
||||
|
||||
# test_联网回答问题()
|
||||
test_解析ipynb文件()
|
||||
|
||||
input("程序完成,回车退出。")
|
||||
print("退出。")
|
||||
140
crazy_functions/解析JupyterNotebook.py
Normal file
140
crazy_functions/解析JupyterNotebook.py
Normal file
@ -0,0 +1,140 @@
|
||||
from toolbox import update_ui
|
||||
from toolbox import CatchException, report_execption, write_results_to_file
|
||||
fast_debug = True
|
||||
|
||||
|
||||
class PaperFileGroup():
|
||||
def __init__(self):
|
||||
self.file_paths = []
|
||||
self.file_contents = []
|
||||
self.sp_file_contents = []
|
||||
self.sp_file_index = []
|
||||
self.sp_file_tag = []
|
||||
|
||||
# count_token
|
||||
from request_llm.bridge_all import model_info
|
||||
enc = model_info["gpt-3.5-turbo"]['tokenizer']
|
||||
def get_token_num(txt): return len(
|
||||
enc.encode(txt, disallowed_special=()))
|
||||
self.get_token_num = get_token_num
|
||||
|
||||
def run_file_split(self, max_token_limit=1900):
|
||||
"""
|
||||
将长文本分离开来
|
||||
"""
|
||||
for index, file_content in enumerate(self.file_contents):
|
||||
if self.get_token_num(file_content) < max_token_limit:
|
||||
self.sp_file_contents.append(file_content)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(self.file_paths[index])
|
||||
else:
|
||||
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf
|
||||
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(
|
||||
file_content, self.get_token_num, max_token_limit)
|
||||
for j, segment in enumerate(segments):
|
||||
self.sp_file_contents.append(segment)
|
||||
self.sp_file_index.append(index)
|
||||
self.sp_file_tag.append(
|
||||
self.file_paths[index] + f".part-{j}.txt")
|
||||
|
||||
|
||||
|
||||
def parseNotebook(filename, enable_markdown=1):
|
||||
import json
|
||||
|
||||
CodeBlocks = []
|
||||
with open(filename, 'r', encoding='utf-8', errors='replace') as f:
|
||||
notebook = json.load(f)
|
||||
for cell in notebook['cells']:
|
||||
if cell['cell_type'] == 'code' and cell['source']:
|
||||
# remove blank lines
|
||||
cell['source'] = [line for line in cell['source'] if line.strip()
|
||||
!= '']
|
||||
CodeBlocks.append("".join(cell['source']))
|
||||
elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
|
||||
cell['source'] = [line for line in cell['source'] if line.strip()
|
||||
!= '']
|
||||
CodeBlocks.append("Markdown:"+"".join(cell['source']))
|
||||
|
||||
Code = ""
|
||||
for idx, code in enumerate(CodeBlocks):
|
||||
Code += f"This is {idx+1}th code block: \n"
|
||||
Code += code+"\n"
|
||||
|
||||
return Code
|
||||
|
||||
|
||||
def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
|
||||
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
|
||||
|
||||
pfg = PaperFileGroup()
|
||||
|
||||
print(file_manifest)
|
||||
for fp in file_manifest:
|
||||
file_content = parseNotebook(fp, enable_markdown=1)
|
||||
pfg.file_paths.append(fp)
|
||||
pfg.file_contents.append(file_content)
|
||||
|
||||
# <-------- 拆分过长的IPynb文件 ---------->
|
||||
pfg.run_file_split(max_token_limit=1024)
|
||||
n_split = len(pfg.sp_file_contents)
|
||||
|
||||
inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
|
||||
r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
|
||||
r"Start a new line for a block and block num use Chinese." +
|
||||
f"\n\n{frag}" for frag in pfg.sp_file_contents]
|
||||
inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
|
||||
sys_prompt_array = ["You are a professional programmer."] * n_split
|
||||
|
||||
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
|
||||
inputs_array=inputs_array,
|
||||
inputs_show_user_array=inputs_show_user_array,
|
||||
llm_kwargs=llm_kwargs,
|
||||
chatbot=chatbot,
|
||||
history_array=[[""] for _ in range(n_split)],
|
||||
sys_prompt_array=sys_prompt_array,
|
||||
# max_workers=5, # OpenAI所允许的最大并行过载
|
||||
scroller_max_len=80
|
||||
)
|
||||
|
||||
# <-------- 整理结果,退出 ---------->
|
||||
block_result = " \n".join(gpt_response_collection)
|
||||
chatbot.append(("解析的结果如下", block_result))
|
||||
history.extend(["解析的结果如下", block_result])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
# <-------- 写入文件,退出 ---------->
|
||||
res = write_results_to_file(history)
|
||||
chatbot.append(("完成了吗?", res))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
@CatchException
|
||||
def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
chatbot.append([
|
||||
"函数插件功能?",
|
||||
"对IPynb文件进行解析。Contributor: codycjy."])
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
|
||||
history = [] # 清空历史
|
||||
import glob
|
||||
import os
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "":
|
||||
txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
if txt.endswith('.ipynb'):
|
||||
file_manifest = [txt]
|
||||
else:
|
||||
file_manifest = [f for f in glob.glob(
|
||||
f'{project_folder}/**/*.ipynb', recursive=True)]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history,
|
||||
a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )
|
||||
@ -11,7 +11,7 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
history_array = []
|
||||
sys_prompt_array = []
|
||||
report_part_1 = []
|
||||
|
||||
|
||||
assert len(file_manifest) <= 512, "源文件太多(超过512个), 请缩减输入文件的数量。或者,您也可以选择删除此行警告,并修改代码拆分file_manifest列表,从而实现分批次处理。"
|
||||
############################## <第一步,逐个文件分析,多线程> ##################################
|
||||
for index, fp in enumerate(file_manifest):
|
||||
@ -63,10 +63,10 @@ def 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs,
|
||||
current_iteration_focus = ', '.join([os.path.relpath(fp, project_folder) for index, fp in enumerate(this_iteration_file_manifest)])
|
||||
i_say = f'根据以上分析,对程序的整体功能和构架重新做出概括。然后用一张markdown表格整理每个文件的功能(包括{previous_iteration_files_string})。'
|
||||
inputs_show_user = f'根据以上分析,对程序的整体功能和构架重新做出概括,由于输入长度限制,可能需要分组处理,本组文件为 {current_iteration_focus} + 已经汇总的文件组。'
|
||||
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
|
||||
this_iteration_history = copy.deepcopy(this_iteration_gpt_response_collection)
|
||||
this_iteration_history.append(last_iteration_result)
|
||||
result = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
|
||||
inputs=i_say, inputs_show_user=inputs_show_user, llm_kwargs=llm_kwargs, chatbot=chatbot,
|
||||
history=this_iteration_history, # 迭代之前的分析
|
||||
sys_prompt="你是一个程序架构分析师,正在分析一个项目的源代码。")
|
||||
report_part_2.extend([i_say, result])
|
||||
@ -222,8 +222,8 @@ def 解析一个Golang项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
|
||||
|
||||
@CatchException
|
||||
def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
@ -243,9 +243,9 @@ def 解析一个Lua项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, syst
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何lua文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
@CatchException
|
||||
def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
@ -263,4 +263,45 @@ def 解析一个CSharp项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, s
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何CSharp文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
|
||||
|
||||
@CatchException
|
||||
def 解析任意code项目(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
txt_pattern = plugin_kwargs.get("advanced_arg")
|
||||
txt_pattern = txt_pattern.replace(",", ",")
|
||||
# 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
|
||||
pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
|
||||
if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
|
||||
# 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
|
||||
pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
|
||||
pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
|
||||
# 将要忽略匹配的文件名(例如: ^README.md)
|
||||
pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
|
||||
# 生成正则表达式
|
||||
pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
|
||||
pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
|
||||
|
||||
history.clear()
|
||||
import glob, os, re
|
||||
if os.path.exists(txt):
|
||||
project_folder = txt
|
||||
else:
|
||||
if txt == "": txt = '空空如也的输入栏'
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
# 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
|
||||
maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
|
||||
if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
|
||||
extract_folder_path = maybe_dir[0]
|
||||
else:
|
||||
extract_folder_path = project_folder
|
||||
# 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
|
||||
file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
|
||||
os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
|
||||
if len(file_manifest) == 0:
|
||||
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
|
||||
return
|
||||
yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
|
||||
@ -25,6 +25,35 @@ def 同时问询(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
|
||||
retry_times_at_unknown_error=0
|
||||
)
|
||||
|
||||
history.append(txt)
|
||||
history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
|
||||
|
||||
@CatchException
|
||||
def 同时问询_指定模型(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
|
||||
"""
|
||||
txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
|
||||
llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
|
||||
plugin_kwargs 插件模型的参数,如温度和top_p等,一般原样传递下去就行
|
||||
chatbot 聊天显示框的句柄,用于显示给用户
|
||||
history 聊天历史,前情提要
|
||||
system_prompt 给gpt的静默提醒
|
||||
web_port 当前软件运行的端口号
|
||||
"""
|
||||
history = [] # 清空历史,以免输入溢出
|
||||
chatbot.append((txt, "正在同时咨询ChatGPT和ChatGLM……"))
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
|
||||
|
||||
# llm_kwargs['llm_model'] = 'chatglm&gpt-3.5-turbo&api2d-gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
||||
llm_kwargs['llm_model'] = plugin_kwargs.get("advanced_arg", 'chatglm&gpt-3.5-turbo') # 'chatglm&gpt-3.5-turbo' # 支持任意数量的llm接口,用&符号分隔
|
||||
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
|
||||
inputs=txt, inputs_show_user=txt,
|
||||
llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
|
||||
sys_prompt=system_prompt,
|
||||
retry_times_at_unknown_error=0
|
||||
)
|
||||
|
||||
history.append(txt)
|
||||
history.append(gpt_say)
|
||||
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新
|
||||
Reference in New Issue
Block a user