3d interp

2023-07-25 10:09:50 +08:00
parent a8093b9dd8
commit fb22f716ff
4 changed files with 153 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -151,3 +151,4 @@ multi-language
 request_llm/moss
 media
 flagged
+objdump.tmp
--- a/crazy_functions/crazy_functions_test.py
+++ b/crazy_functions/crazy_functions_test.py
@ -237,6 +237,14 @@ def test_虚空终端CodeInterpreter():
        cli_printer.print(cb)


+def test_解析项目源代码炫酷版():
+    from crazy_functions.解析项目源代码炫酷版 import 解析一个Python项目炫酷版
+    txt = './'
+
+    for cookies, cb, hist, msg in (解析一个Python项目炫酷版)(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+        cli_printer.print(cb)
+
+
 if __name__ == "__main__":
    # test_解析一个Python项目()
    # test_Latex英文润色()
@ -253,6 +261,7 @@ if __name__ == "__main__":
    # test_Langchain知识库读取()
    # test_Latex()
    # test_chatglm_finetune()
-    test_虚空终端CodeInterpreter()
+    # test_虚空终端CodeInterpreter()
+    test_解析项目源代码炫酷版()
    input("程序完成，回车退出。")
    print("退出。")
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@ -141,7 +141,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        chatbot, history_array, sys_prompt_array, 
        refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
        handle_token_exceed=True, show_user_at_complete=False,
-        retry_times_at_unknown_error=2,
+        retry_times_at_unknown_error=2, callback_fn=None
        ):
    """
    Request GPT model using multiple threads with UI and high efficiency
@ -166,6 +166,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        handle_token_exceed：是否自动处理token溢出的情况，如果选择自动处理，则会在溢出时暴力截断，默认开启
        show_user_at_complete (bool, optional): (在结束时，把完整输入-输出结果显示在聊天框)
        retry_times_at_unknown_error：子任务失败时的重试次数
+        callback_fn: 当信息更新时，在主进程调用的回调函数

    输出 Returns:
        list: List of GPT model responses （每个子任务的输出汇总，如果某个子任务出错，response中会携带traceback报错信息，方便调试和定位问题。）
@ -283,6 +284,9 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        # 在前端打印些好玩的东西
        chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始，完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))]
        yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
+        # 回调函数
+        if callback_fn is not None: callback_fn([mutable[thread_index][0] for thread_index in range(len(futures))])
+        # 结束了吗？
        if all(worker_done):
            executor.shutdown()
            break
--- a/crazy_functions/解析项目源代码炫酷版.py
+++ b/crazy_functions/解析项目源代码炫酷版.py
@ -0,0 +1,137 @@
+from toolbox import update_ui
+from toolbox import CatchException, report_execption, write_results_to_file
+from toolbox import objdump, objload
+from .crazy_utils import input_clipping
+
+class ThreeJSPlot():
+    def __init__(self) -> None:
+        self.files = None
+
+    def read_files(self, files):
+        self.files = files
+
+    def launch_render_interface(self):
+        from vhmap.mcom import mcom
+        self.visual_bridge = mcom()
+
+
+
+def 解析源代码炫酷版(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
+    import os, copy
+    from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
+    from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
+    msg = '正常'
+    summary_batch_isolation = True
+    inputs_array = []
+    inputs_show_user_array = []
+    history_array = []
+    sys_prompt_array = []
+    report_part_1 = []
+
+    assert len(file_manifest) <= 512, "源文件太多（超过512个）, 请缩减输入文件的数量。或者，您也可以选择删除此行警告，并修改代码拆分file_manifest列表，从而实现分批次处理。"
+    ############################## <第一步，逐个文件分析，多线程> ##################################
+    for index, fp in enumerate(file_manifest):
+        # 读取文件
+        with open(fp, 'r', encoding='utf-8', errors='replace') as f:
+            file_content = f.read()
+        prefix = "接下来请你逐文件分析下面的工程" if index==0 else ""
+        i_say = prefix + f'请对下面的程序文件做一个概述文件名是{os.path.relpath(fp, project_folder)}，文件代码是 ```{file_content}```'
+        i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的程序文件做一个概述: {os.path.abspath(fp)}'
+        # 装载请求内容
+        inputs_array.append(i_say)
+        inputs_show_user_array.append(i_say_show_user)
+        history_array.append([])
+        sys_prompt_array.append("你是一个程序架构分析师，正在分析一个源代码项目。你的回答必须简单明了。")
+
+    def callback_when_intel_update(gpt_reply_array):
+        objdump((gpt_reply_array, file_manifest))
+        return
+
+
+    # 文件读取完成，对每一个源代码文件，生成一个请求线程，发送到chatgpt进行分析
+    gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
+        inputs_array = inputs_array,
+        inputs_show_user_array = inputs_show_user_array,
+        history_array = history_array,
+        sys_prompt_array = sys_prompt_array,
+        llm_kwargs = llm_kwargs,
+        chatbot = chatbot,
+        show_user_at_complete = True,
+        callback_fn=callback_when_intel_update,
+    )
+
+    # 全部文件解析完成，结果写入文件，准备对工程源代码进行汇总分析
+    report_part_1 = copy.deepcopy(gpt_response_collection)
+    history_to_return = report_part_1
+    res = write_results_to_file(report_part_1)
+    chatbot.append(("完成？", "逐个文件分析已完成。" + res + "\n\n正在开始汇总。"))
+    yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
+    ############################## <第二步，综合，单线程，分组+迭代处理> ##################################
+
+
+    ############################## <END> ##################################
+    history_to_return.extend([])
+    res = write_results_to_file(history_to_return)
+    chatbot.append(("完成了吗？", res))
+    yield from update_ui(chatbot=chatbot, history=history_to_return) # 刷新界面
+
+
+@CatchException
+def 解析一个Python项目炫酷版(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    history = []    # 清空历史，以免输入溢出
+    import glob, os
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.py', recursive=True)]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何python文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码炫酷版(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)
+
+
+
+@CatchException
+def 解析任意code项目炫酷版(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
+    txt_pattern = plugin_kwargs.get("advanced_arg")
+    txt_pattern = txt_pattern.replace("，", ",")
+    # 将要匹配的模式(例如: *.c, *.cpp, *.py, config.toml)
+    pattern_include = [_.lstrip(" ,").rstrip(" ,") for _ in txt_pattern.split(",") if _ != "" and not _.strip().startswith("^")]
+    if not pattern_include: pattern_include = ["*"] # 不输入即全部匹配
+    # 将要忽略匹配的文件后缀(例如: ^*.c, ^*.cpp, ^*.py)
+    pattern_except_suffix = [_.lstrip(" ^*.,").rstrip(" ,") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^*.")]
+    pattern_except_suffix += ['zip', 'rar', '7z', 'tar', 'gz'] # 避免解析压缩文件
+    # 将要忽略匹配的文件名(例如: ^README.md)
+    pattern_except_name = [_.lstrip(" ^*,").rstrip(" ,").replace(".", "\.") for _ in txt_pattern.split(" ") if _ != "" and _.strip().startswith("^") and not _.strip().startswith("^*.")]
+    # 生成正则表达式
+    pattern_except = '/[^/]+\.(' + "|".join(pattern_except_suffix) + ')$'
+    pattern_except += '|/(' + "|".join(pattern_except_name) + ')$' if pattern_except_name != [] else ''
+
+    history.clear()
+    import glob, os, re
+    if os.path.exists(txt):
+        project_folder = txt
+    else:
+        if txt == "": txt = '空空如也的输入栏'
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    # 若上传压缩文件, 先寻找到解压的文件夹路径, 从而避免解析压缩文件
+    maybe_dir = [f for f in glob.glob(f'{project_folder}/*') if os.path.isdir(f)]
+    if len(maybe_dir)>0 and maybe_dir[0].endswith('.extract'):
+        extract_folder_path = maybe_dir[0]
+    else:
+        extract_folder_path = project_folder
+    # 按输入的匹配模式寻找上传的非压缩文件和已解压的文件
+    file_manifest = [f for pattern in pattern_include for f in glob.glob(f'{extract_folder_path}/**/{pattern}', recursive=True) if "" != extract_folder_path and \
+                      os.path.isfile(f) and (not re.search(pattern_except, f) or pattern.endswith('.' + re.search(pattern_except, f).group().split('.')[-1]))]
+    if len(file_manifest) == 0:
+        report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何文件: {txt}")
+        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
+        return
+    yield from 解析源代码新(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)