优化搜索prompt速度

2023-06-06 16:47:11 +08:00
parent d818ba1be2
commit 3dcf3cf981
4 changed files with 18 additions and 7 deletions
--- a/main.py
+++ b/main.py
@ -101,7 +101,7 @@ class ChatBot(ChatBotFrame):
        with gr.Box():
            with gr.Row():
                with gr.Column(scale=100):
-                    self.pro_results = gr.Chatbot(label='Prompt and result').style(height=422)
+                    self.pro_results = gr.Chatbot(label='Prompt and result', elem_id='prompt_result').style()
                with gr.Column(scale=10):
                    Tips = "用 BORF 分析法设计chat GPT prompt:\n" \
                           "1、阐述背景 B(Background): 说明背景，为chatGPT提供充足的信息\n" \
--- a/func_box.py
+++ b/func_box.py
@ -10,6 +10,8 @@ import os.path
 import subprocess
 import threading
 import time
 from concurrent.futures import ThreadPoolExecutor
 import Levenshtein
 import psutil
 import re
 import tempfile
@ -284,25 +286,30 @@ def diff_list(txt='', percent=0.70, switch: list = None, lst: list = None, sp=15
    Returns:
        返回一个列表
    """
    import difflib
    count_dict = {}
    if not lst:
        lst = SqliteHandle('ai_common').get_prompt_value(txt)
        lst.update(SqliteHandle(f"ai_private_{hosts}").get_prompt_value(txt))
    # diff 数据，根据precent系数归类数据
-    for i in lst:
+    str_ = time.time()
    def tf_factor_calcul(i):
        found = False
-        for key in count_dict.keys():
+        dict_copy = count_dict.copy()
-            str_tf = difflib.SequenceMatcher(None, i, key).ratio()
+        for key in dict_copy.keys():
            str_tf = Levenshtein.jaro_winkler(i, key)
            if str_tf >= percent:
                if len(i) > len(key):
-                    count_dict[i] = count_dict[key] + 1
+                    count_dict[i] = count_dict.copy()[key] + 1
                    count_dict.pop(key)
                else:
                    count_dict[key] += 1
                found = True
                break
        if not found: count_dict[i] = 1
    with ThreadPoolExecutor(1000) as executor:
        executor.map(tf_factor_calcul, lst)
    print('计算耗时', time.time()-str_)
    sorted_dict = sorted(count_dict.items(), key=lambda x: x[1], reverse=True)
    if switch:
        sorted_dict += prompt_retrieval(is_all=switch, hosts=hosts, search=True)
--- a/requirements.txt
+++ b/requirements.txt
@ -22,3 +22,4 @@ psutil
 distro
 python-dotenv
 rich
 Levenshtein
--- a/theme.py
+++ b/theme.py
@ -139,6 +139,10 @@ textarea {
    /* overflow: auto !important; */
    z-index: 2;
 }
 #prompt_result{
    height: 50vh !important;
    max-height: 50vh !important;
 }
 .wrap.svelte-18telvq.svelte-18telvq {
    padding: var(--block-padding) !important;
    height: 100% !important;
@ -153,7 +157,6 @@ textarea {
    width: 100%;
    height: 100%;
 }
 .markdown-body table {
    margin: 1em 0;
    border-collapse: collapse;