增加prompt收集统计

2023-05-08 10:30:23 +08:00
parent 808aecab91
commit 519d0a1f42
7 changed files with 118 additions and 16 deletions
--- a/main.py
+++ b/main.py
@ -56,7 +56,7 @@ class ChatBotFrame:
    def __init__(self):
        self.cancel_handles = []
        self.initial_prompt = "In answer to my question, Think about what are some alternative perspectives"
-        self.title_html = f"<h1 align=\"center\">ChatGPT For Tester {get_current_version()}</h1>"
+        self.title_html = f"<h1 align=\"center\">ksoGPT  {get_current_version()}</h1>"
        self.description = """代码开源和更新[地址🚀](https://github.com/binary-husky/chatgpt_academic)，感谢热情的[开发者们❤️](https://github.com/binary-husky/chatgpt_academic/graphs/contributors)"""


@ -128,7 +128,7 @@ class ChatBot(ChatBotFrame):
        with gr.Tab('Setting'):
            self.top_p = gr.Slider(minimum=-0, maximum=1.0, value=1.0, step=0.01, interactive=True, label="Top-p (nucleus sampling)", )
            self.temperature = gr.Slider(minimum=-0, maximum=2.0, value=1.0, step=0.01, interactive=True, label="Temperature", )
-            self.max_length_sl = gr.Slider(minimum=256, maximum=4096, value=512, step=1, interactive=True, label="MaxLength", )
+            self.max_length_sl = gr.Slider(minimum=256, maximum=4096, value=4096, step=1, interactive=True, label="MaxLength", )
            self.models_box = gr.CheckboxGroup(["input加密"], value=["input加密"], label="对话模式")
            self.system_prompt = gr.Textbox(show_label=True, lines=2, placeholder=f"System Prompt", label="System prompt", value=self.initial_prompt)
            self.md_dropdown = gr.Dropdown(AVAIL_LLM_MODELS, value=LLM_MODEL, label="更换LLM模型/请求源").style(container=False)
@ -229,7 +229,7 @@ class ChatBot(ChatBotFrame):
        import threading, webbrowser, time

        print(f"如果浏览器没有自动打开，请复制并转到以下URL：")
-        print(f"\t（亮色主题）: {self.__url}")
+        print(f"\t（亮色主题）: http://localhost:{PORT}")
        print(f"\t（暗色主题）: {self.__url}/?__dark-theme=true")
    
        def open():
--- a/docs/waifu_plugin/autoload.js
+++ b/docs/waifu_plugin/autoload.js
@ -12,7 +12,7 @@ try {
            live2d_settings['waifuTipsSize']        = '187x52';  
            live2d_settings['canSwitchModel']       = true;
            live2d_settings['canSwitchTextures']    = true;
-            live2d_settings['canSwitchHitokoto']    = false;
+            live2d_settings['canSwitchHitokoto']    = true;
            live2d_settings['canTakeScreenshot']    = false;
            live2d_settings['canTurnToHomePage']    = false;
            live2d_settings['canTurnToAboutPage']   = false;
--- a/docs/waifu_plugin/waifu-tips.json
+++ b/docs/waifu_plugin/waifu-tips.json
@ -34,10 +34,10 @@
            "2": ["来自 Potion Maker 的 Tia 酱 ~"]  
        },
        "hitokoto_api_message": {
-            "lwl12.com": ["这句一言来自 <span style=\"color:#0099cc;\">『{source}』</span>", "，是 <span style=\"color:#0099cc;\">{creator}</span> 投稿的", "。"],
-            "fghrsh.net": ["这句一言出处是 <span style=\"color:#0099cc;\">『{source}』</span>，是 <span style=\"color:#0099cc;\">FGHRSH</span> 在 {date} 收藏的！"],
-            "jinrishici.com": ["这句诗词出自 <span style=\"color:#0099cc;\">《{title}》</span>，是 {dynasty}诗人 {author} 创作的！"],
-            "hitokoto.cn": ["这句一言来自 <span style=\"color:#0099cc;\">『{source}』</span>，是 <span style=\"color:#0099cc;\">{creator}</span> 在 hitokoto.cn 投稿的。"]
+            "lwl12.com": ["这句一言来自 <span style=\"color:#ff99da;\">『{source}』</span>", "，是 <span style=\"color:#ff99da;\">{creator}</span> 投稿的", "。"],
+            "fghrsh.net": ["这句一言出处是 <span style=\"color:#ff99da;\">『{source}』</span>，是 <span style=\"color:#ff99da;\">FGHRSH</span> 在 {date} 收藏的！"],
+            "jinrishici.com": ["这句诗词出自 <span style=\"color:#ff99da;\">《{title}》</span>，是 {dynasty}诗人 {author} 创作的！"],
+            "hitokoto.cn": ["这句一言来自 <span style=\"color:#ff99da;\">『{source}』</span>，是 <span style=\"color:#ff99da;\">{creator}</span> 在 hitokoto.cn 投稿的。"]
        }
    },
    "mouseover": [
--- a/func_box.py
+++ b/func_box.py
@ -12,7 +12,11 @@ import tempfile
 import shutil
 from contextlib import ExitStack
 import logging
+import yaml
 logger = logging
+from sklearn.feature_extraction.text import CountVectorizer
+import numpy as np
+from scipy.linalg import norm
 """contextlib 是 Python 标准库中的一个模块，提供了一些工具函数和装饰器，用于支持编写上下文管理器和处理上下文的常见任务，例如资源管理、异常处理等。
 官网：https://docs.python.org/3/library/contextlib.html"""

@ -123,8 +127,8 @@ def ipaddr():  # 获取本地ipx
            return ip[i][0][1]

 def encryption_str(txt: str):
-    txt = str(txt)
    """(关键字)(加密间隔)匹配机制（关键字间隔）"""
+    txt = str(txt)
    pattern = re.compile(rf"(Authorization|WPS-Sid|Cookie)(:|\s+)\s*(\S+)[\s\S]*?(?=\n|$|\s)", re.IGNORECASE)
    result = pattern.sub(lambda x: x.group(1) + ": XXXXXXXX", txt)
    return result
@ -154,15 +158,65 @@ def chat_history(log: list, split=0):
    return chat, history


+def df_similarity(s1, s2):
+    """弃用，会警告，这个库不会用"""
+    def add_space(s):
+        return ' '.join(list(s))
+    # 将字中间加入空格
+    s1, s2 = add_space(s1), add_space(s2)
+    # 转化为TF矩阵
+    cv = CountVectorizer(tokenizer=lambda s: s.split())
+    corpus = [s1, s2]
+    vectors = cv.fit_transform(corpus).toarray()
+    # 计算TF系数
+    return np.dot(vectors[0], vectors[1]) / (norm(vectors[0]) * norm(vectors[1]))
+
+
+def diff_list(lst: list, percent=0.70):
+    import difflib
+    count_dict = {}
+    for i in lst:
+        found = False
+        for key in count_dict.keys():
+            if difflib.SequenceMatcher(None, i, key).ratio() >= percent:
+                if len(i) > len(key):
+                    count_dict[i] = count_dict[key] + 1
+                    count_dict.pop(key)
+                else:
+                    count_dict[key] += 1
+                found = True
+                break
+        if not found:
+            count_dict[i] = 1
+    return
+
+
+class YamlHandle:
+
+    def __init__(self, file='/Users/kilig/Job/Python-project/academic_gpt/logs/ai_prompt.yaml'):
+        self.file = file
+
+    def load(self) -> dict:
+        with open(file=self.file, mode='r') as f:
+            data = yaml.safe_load(f)
+            return data
+
+    def update(self, key, value):
+        date = self.load()
+        if not date:
+            date = {}
+        date[key] = value
+        with open(file=self.file, mode='w') as f:
+            yaml.dump(date, f, allow_unicode=True)
+        return date
+
+

 if __name__ == '__main__':

    txt = "Authorization: WPS-2:AqY7ik9XQ92tvO7+NlCRvA==:b2f626f496de9c256605a15985c855a8b3e4be99\nwps-Sid: V02SgISzdeWrYdwvW_xbib-fGlqUIIw00afc5b890008c1976f\nCookie: wpsua=V1BTVUEvMS4wIChhbmRyb2lkLW9mZmljZToxNy41O2FuZHJvaWQ6MTA7ZjIwZDAyNWQzYTM5MmExMDBiYzgxNWI2NmI3Y2E5ODI6ZG1sMmJ5QldNakF5TUVFPSl2aXZvL1YyMDIwQQ=="
    txt = "Authorization: WPS-2:AqY7ik9XQ92tvO7+NlCRvA==:b2f626f496de9c256605a15985c855a8b3e4be99"
-    print(encryption_str(txt))

-    def update_ui(chatbot, history, msg='正常', txt='', obj=None, btn1=None, btn2=None, au_text=None, *args):
-        print(chatbot, history, msg, txt, obj)
+    # print(YamlHandle().update(123123213, 2131231231))

-    ll = [4,5, 6]
-    update_ui(chatbot=1, history=2, *ll)
+    diff_list(YamlHandle().load())
--- a/logs/ai_prompt.yaml
+++ b/logs/ai_prompt.yaml
@ -0,0 +1,16 @@
+你好: 你好！有什么我可以帮您的吗？
+? '我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL，然后请使用Markdown格式封装，并且不要有反斜线，不要用代码块。现在，请按以下描述给我发送图片：
+
+  嘻嘻嘻哈哈哈'
+: ''
+? '我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL，然后请使用Markdown格式封装，并且不要有反斜线，不要用代码块。现在，请按以下描述给我发送图片：
+
+  猫又'
+: ''
+? '我需要你找一张网络图片。使用Unsplash API(https://source.unsplash.com/960x640/?<英语关键词>)获取图片URL，然后请使用Markdown格式封装，并且不要有反斜线，不要用代码块。现在，请按以下描述给我发送图片：
+
+  猫猫1241242142114221214412124412'
+: '我为您找到了一张猫猫的图片，如下：
+
+
+  ![猫猫的图片](https://source.unsplash.com/960x640/?cat)'
--- a/test.py
+++ b/test.py
@ -5,6 +5,7 @@
 # @Descr   :
 import gradio as gr

+import func_box


 class my_class():
@ -49,10 +50,41 @@ class ChatBot():
            self.btn = gr.Button(value="Submit1")
            self.btn2 = gr.Button(value="Submit2", visible=False)
            self.obj = gr.State({'obj': None, 'btn': self.btn, 'btn2': self.btn2})
+            dic = func_box.YamlHandle().load()
+            gr.EventData
+
            self.btn.click(set_obj, inputs=[self.obj], outputs=[self.obj, self.btn, self.btn2])
            self.btn2.click(print_obj, inputs=[self.obj], outputs=[self.txt])
        self.demo.launch()

 if __name__ == '__main__':
-    ChatBot().draw_test()
+    import gradio as gr
+
+
+    def highlight_text(text, highlights):
+        for h in highlights:
+            text = text.replace(h, f"<span class='highlight' onclick='alert(\"{h}\")'>{h}</span>")
+        return text
+
+
+    app = gr.Interface(
+        fn=highlight_text,
+        inputs=["text", "highlighted_text"],
+        outputs="html",
+        interpretation="default",
+        examples=[["The quick brown fox jumps over the lazy dog.", ["quick", "brown", "fox", "lazy"]]],
+        layout="unaligned",
+        capture_session=True
+    )
+
+    app.launch()
+
+
+
+
+
+
+
+
+

--- a/toolbox.py
+++ b/toolbox.py
@ -83,7 +83,7 @@ def update_ui(chatbot, history, msg='正常', txt='', *args):  # 刷新界面
    """
    刷新用户界面
    """
-
+    func_box.YamlHandle().update(key=chatbot[-1][0], value=chatbot[-1][1])
    assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时，可用clear将其清空，然后用for+append循环重新赋值。"
    yield chatbot.get_cookies(), chatbot, history, msg, txt