no stream test

Merge branch 'master' into interface-interlm
edit default configuration
2023-07-20 12:11:22 +08:00 · 2023-07-20 11:40:45 +08:00 · 2023-07-20 11:39:35 +08:00 · 2023-07-20 11:09:22 +08:00 · 2023-07-20 10:12:42 +08:00 · 2023-07-19 10:15:15 +08:00
9 changed files with 416 additions and 92 deletions
--- a/README.md
+++ b/README.md
@ -44,7 +44,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
 Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
 [谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL，让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
 互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题，让信息永不过时
-⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/)，目前最好的论文翻译工具
+⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/)，目前最好的论文翻译工具
 ⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/)，自动断句，自动寻找回答时机
 公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png)，支持公式、代码高亮
 多线程函数插件支持 | 支持多线调用chatgpt，一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
--- a/config.py
+++ b/config.py
@ -32,9 +32,9 @@ else:

 # ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------

-# 重新URL重新定向，实现更换API_URL的作用（常规情况下，不要修改!! 高危设置！通过修改此设置，您将把您的API-KEY和对话隐私完全暴露给您设定的中间人！）
-# 格式 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"} 
-# 例如 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions":"https://reverse-proxy-url/v1/chat/completions"}
+# 重新URL重新定向，实现更换API_URL的作用（高危设置! 常规情况下不要修改! 通过修改此设置，您将把您的API-KEY和对话隐私完全暴露给您设定的中间人！）
+# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"} 
+# 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
 API_URL_REDIRECT = {}


@ -71,7 +71,7 @@ MAX_RETRY = 2
 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
 LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
-# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
+# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]


 # ChatGLM(2) Finetune Model Path （如果使用ChatGLM2微调模型，需要把"chatglmft"加入AVAIL_LLM_MODELS中）
--- a/main.py
+++ b/main.py
@ -22,8 +22,10 @@ def main():
    # 问询记录, python 版本建议3.9+（越新越好）
    import logging, uuid
    os.makedirs("gpt_log", exist_ok=True)
-    try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
-    except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
+    try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
+    except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO,  format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
+    # Disable logging output from the 'httpx' logger
+    logging.getLogger("httpx").setLevel(logging.WARNING)
    print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦！")

    # 一些普通功能模块
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@ -248,7 +248,6 @@ if "moss" in AVAIL_LLM_MODELS:
 if "stack-claude" in AVAIL_LLM_MODELS:
    from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
    from .bridge_stackclaude import predict as claude_ui
-    # claude
    model_info.update({
        "stack-claude": {
            "fn_with_ui": claude_ui,
@ -263,7 +262,6 @@ if "newbing-free" in AVAIL_LLM_MODELS:
    try:
        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
        from .bridge_newbingfree import predict as newbingfree_ui
-        # claude
        model_info.update({
            "newbing-free": {
                "fn_with_ui": newbingfree_ui,
@ -280,7 +278,6 @@ if "newbing" in AVAIL_LLM_MODELS:   # same with newbing-free
    try:
        from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
        from .bridge_newbingfree import predict as newbingfree_ui
-        # claude
        model_info.update({
            "newbing": {
                "fn_with_ui": newbingfree_ui,
@ -297,7 +294,6 @@ if "chatglmft" in AVAIL_LLM_MODELS:   # same with newbing-free
    try:
        from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
        from .bridge_chatglmft import predict as chatglmft_ui
-        # claude
        model_info.update({
            "chatglmft": {
                "fn_with_ui": chatglmft_ui,
@ -310,7 +306,22 @@ if "chatglmft" in AVAIL_LLM_MODELS:   # same with newbing-free
        })
    except:
        print(trimmed_format_exc())
-
+if "internlm" in AVAIL_LLM_MODELS:
+    try:
+        from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
+        from .bridge_internlm import predict as internlm_ui
+        model_info.update({
+            "internlm": {
+                "fn_with_ui": internlm_ui,
+                "fn_without_ui": internlm_noui,
+                "endpoint": None,
+                "max_token": 4096,
+                "tokenizer": tokenizer_gpt35,
+                "token_cnt": get_token_num_gpt35,
+            }
+        })
+    except:
+        print(trimmed_format_exc())

 def LLM_CATCH_EXCEPTION(f):
    """
--- a/request_llm/bridge_chatgpt.py
+++ b/request_llm/bridge_chatgpt.py
@ -141,7 +141,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面

    try:
-        headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
+        headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream=False)
    except RuntimeError as e:
        chatbot[-1] = (inputs, f"您提供的api-key不满足要求，不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
        yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
@ -156,7 +156,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
            from .bridge_all import model_info
            endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
            response = requests.post(endpoint, headers=headers, proxies=proxies,
-                                    json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
+                                    json=payload, stream=False, timeout=TIMEOUT_SECONDS);break
        except:
            retry += 1
            chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
@ -174,11 +174,18 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                chunk = next(stream_response)
            except StopIteration:
                # 非OpenAI官方接口的出现这样的报错，OpenAI和API2D不会走这里
-                from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
-                chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk.decode())}")
-                yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk.decode()) # 刷新界面
-                return
-            
+                chunk_decoded = chunk.decode()
+                if '"finish_reason":"stop"' in chunk_decoded:
+                    history[-1] = json.loads(chunk_decoded)['choices'][0]['message']['content']
+                    chatbot[-1] = (history[-2], history[-1])
+                    yield from update_ui(chatbot=chatbot, history=history, msg='OK') # 刷新界面
+                    return
+                else:
+                    error_msg = chunk_decoded
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
+                    yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
+                    return
+                
            # print(chunk.decode()[6:])
            if is_head_of_the_stream and (r'"object":"error"' not in chunk.decode()):
                # 数据流的第一帧不携带content
@ -187,7 +194,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
            if chunk:
                try:
                    chunk_decoded = chunk.decode()
-                    # 前者API2D的
+                    # 前者是API2D的结束条件，后者是OPENAI的结束条件
                    if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
                        # 判定为数据流的结束，gpt_replying_buffer也写完了
                        logging.info(f'[response] {gpt_replying_buffer}')
@ -200,41 +207,45 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
                    history[-1] = gpt_replying_buffer
                    chatbot[-1] = (history[-2], history[-1])
                    yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
-
                except Exception as e:
-                    traceback.print_exc()
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
                    chunk = get_full_error(chunk, stream_response)
                    chunk_decoded = chunk.decode()
                    error_msg = chunk_decoded
-                    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
-                    if "reduce the length" in error_msg:
-                        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
-                        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], 
-                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
-                        # history = []    # 清除历史
-                    elif "does not exist" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
-                    elif "Incorrect API key" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
-                    elif "exceeded your current quota" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
-                    elif "account is not active" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
-                    elif "associated with a deactivated account" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
-                    elif "bad forward key" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
-                    elif "Not enough point" in error_msg:
-                        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
-                    else:
-                        from toolbox import regular_txt_to_markdown
-                        tb_str = '```\n' + trimmed_format_exc() + '```'
-                        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+                    chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
                    yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
+                    print(error_msg)
                    return

+def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
+    from .bridge_all import model_info
+    openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
+    if "reduce the length" in error_msg:
+        if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入：history[-2] 是本次输入, history[-1] 是本次输出
+        history = clip_history(inputs=inputs, history=history, tokenizer=model_info[llm_kwargs['llm_model']]['tokenizer'], 
+                                               max_token_limit=(model_info[llm_kwargs['llm_model']]['max_token'])) # history至少释放二分之一
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Reduce the length. 本次输入过长, 或历史数据过长. 历史缓存数据已部分释放, 您可以请再次尝试. (若再次失败则更可能是因为输入过长.)")
+                        # history = []    # 清除历史
+    elif "does not exist" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] Model {llm_kwargs['llm_model']} does not exist. 模型不存在, 或者您没有获得体验资格.")
+    elif "Incorrect API key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Incorrect API key. OpenAI以提供了不正确的API_KEY为由, 拒绝服务. " + openai_website)
+    elif "exceeded your current quota" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You exceeded your current quota. OpenAI以账户额度不足为由, 拒绝服务." + openai_website)
+    elif "account is not active" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Your account is not active. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "associated with a deactivated account" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] You are associated with a deactivated account. OpenAI以账户失效为由, 拒绝服务." + openai_website)
+    elif "bad forward key" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Bad forward key. API2D账户额度不足.")
+    elif "Not enough point" in error_msg:
+        chatbot[-1] = (chatbot[-1][0], "[Local Message] Not enough point. API2D账户点数不足.")
+    else:
+        from toolbox import regular_txt_to_markdown
+        tb_str = '```\n' + trimmed_format_exc() + '```'
+        chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
+    return chatbot, history
+
 def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
    """
    整合所有信息，选择LLM模型，生成http请求，为发送请求做准备
--- a/request_llm/bridge_internlm.py
+++ b/request_llm/bridge_internlm.py
@ -0,0 +1,315 @@
+
+from transformers import AutoModel, AutoTokenizer
+import time
+import threading
+import importlib
+from toolbox import update_ui, get_conf, Singleton
+from multiprocessing import Process, Pipe
+
+model_name = "InternLM"
+cmd_to_install = "`pip install ???`"
+load_message = f"{model_name}尚未加载，加载需要一段时间。注意，取决于`config.py`的配置，{model_name}消耗大量的内存（CPU）或显存（GPU），也许会导致低配计算机卡死 ……"
+def try_to_import_special_deps():
+    import sentencepiece
+
+user_prompt = "<|User|>:{user}<eoh>\n"
+robot_prompt = "<|Bot|>:{robot}<eoa>\n"
+cur_query_prompt = "<|User|>:{user}<eoh>\n<|Bot|>:"
+
+
+def combine_history(prompt, hist):
+    messages = hist
+    total_prompt = ""
+    for message in messages:
+        cur_content = message
+        cur_prompt = user_prompt.replace("{user}", cur_content[0])
+        total_prompt += cur_prompt
+        cur_prompt = robot_prompt.replace("{robot}", cur_content[1])
+        total_prompt += cur_prompt
+    total_prompt = total_prompt + cur_query_prompt.replace("{user}", prompt)
+    return total_prompt
+
+
+@Singleton
+class GetInternlmHandle(Process):
+    def __init__(self):
+        # ⭐主进程执行
+        super().__init__(daemon=True)
+        self.parent, self.child = Pipe()
+        self._model = None
+        self._tokenizer = None
+        self.info = ""
+        self.success = True
+        self.check_dependency()
+        self.start()
+        self.threadLock = threading.Lock()
+
+    def ready(self):
+        # ⭐主进程执行
+        return self._model is not None
+
+    def load_model_and_tokenizer(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        device, = get_conf('LOCAL_MODEL_DEVICE')
+        if self._model is None:
+            tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
+            if device=='cpu':
+                model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
+            else:
+                model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
+
+            model = model.eval()
+        return model, tokenizer
+
+    def llm_stream_generator(self, **kwargs):
+        import torch
+        import logging
+        import copy
+        import warnings
+        import torch.nn as nn
+        from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
+
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        def adaptor():
+            model = self._model
+            tokenizer = self._tokenizer
+            prompt = kwargs['query']
+            max_length = kwargs['max_length']
+            top_p = kwargs['top_p']
+            temperature = kwargs['temperature']
+            history = kwargs['history']
+            real_prompt = combine_history(prompt, history)
+            return model, tokenizer, real_prompt, max_length, top_p, temperature
+        
+        model, tokenizer, prompt, max_length, top_p, temperature = adaptor()
+        prefix_allowed_tokens_fn = None
+        logits_processor = None
+        stopping_criteria = None
+        additional_eos_token_id = 103028
+        generation_config = None
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ https://github.com/InternLM/InternLM/blob/efbf5335709a8c8faeac6eaf07193973ff1d56a1/web_demo.py#L25
+
+        inputs = tokenizer([prompt], padding=True, return_tensors="pt")
+        input_length = len(inputs["input_ids"][0])
+        for k, v in inputs.items():
+            inputs[k] = v.cuda()
+        input_ids = inputs["input_ids"]
+        batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
+        if generation_config is None:
+            generation_config = model.generation_config
+        generation_config = copy.deepcopy(generation_config)
+        model_kwargs = generation_config.update(**kwargs)
+        bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
+        if isinstance(eos_token_id, int):
+            eos_token_id = [eos_token_id]
+        if additional_eos_token_id is not None:
+            eos_token_id.append(additional_eos_token_id)
+        has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
+        if has_default_max_length and generation_config.max_new_tokens is None:
+            warnings.warn(
+                f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
+                "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
+                " recommend using `max_new_tokens` to control the maximum length of the generation.",
+                UserWarning,
+            )
+        elif generation_config.max_new_tokens is not None:
+            generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
+            if not has_default_max_length:
+                logging.warn(
+                    f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
+                    f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
+                    "Please refer to the documentation for more information. "
+                    "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
+                    UserWarning,
+                )
+
+        if input_ids_seq_length >= generation_config.max_length:
+            input_ids_string = "input_ids"
+            logging.warning(
+                f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
+                f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
+                " increasing `max_new_tokens`."
+            )
+
+        # 2. Set generation parameters if not already defined
+        logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
+        stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
+
+        logits_processor = model._get_logits_processor(
+            generation_config=generation_config,
+            input_ids_seq_length=input_ids_seq_length,
+            encoder_input_ids=input_ids,
+            prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
+            logits_processor=logits_processor,
+        )
+
+        stopping_criteria = model._get_stopping_criteria(
+            generation_config=generation_config, stopping_criteria=stopping_criteria
+        )
+        logits_warper = model._get_logits_warper(generation_config)
+
+        unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
+        scores = None
+        while True:
+            model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
+            # forward pass to get next token
+            outputs = model(
+                **model_inputs,
+                return_dict=True,
+                output_attentions=False,
+                output_hidden_states=False,
+            )
+
+            next_token_logits = outputs.logits[:, -1, :]
+
+            # pre-process distribution
+            next_token_scores = logits_processor(input_ids, next_token_logits)
+            next_token_scores = logits_warper(input_ids, next_token_scores)
+
+            # sample
+            probs = nn.functional.softmax(next_token_scores, dim=-1)
+            if generation_config.do_sample:
+                next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
+            else:
+                next_tokens = torch.argmax(probs, dim=-1)
+
+            # update generated ids, model inputs, and length for next step
+            input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
+            model_kwargs = model._update_model_kwargs_for_generation(
+                outputs, model_kwargs, is_encoder_decoder=False
+            )
+            unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
+            
+            output_token_ids = input_ids[0].cpu().tolist()
+            output_token_ids = output_token_ids[input_length:]
+            for each_eos_token_id in eos_token_id:
+                if output_token_ids[-1] == each_eos_token_id:
+                    output_token_ids = output_token_ids[:-1]
+            response = tokenizer.decode(output_token_ids)
+
+            yield response
+            # stop when each sentence is finished, or if we exceed the maximum length
+            if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
+                return
+
+
+
+    def check_dependency(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        try:
+            try_to_import_special_deps()
+            self.info = "依赖检测通过"
+            self.success = True
+        except:
+            self.info = f"缺少{model_name}的依赖，如果要使用{model_name}，除了基础的pip依赖以外，您还需要运行{cmd_to_install}安装{model_name}的依赖。"
+            self.success = False
+
+    def run(self):
+        # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
+        # 第一次运行，加载参数
+        try:
+            self._model, self._tokenizer = self.load_model_and_tokenizer()
+        except:
+            from toolbox import trimmed_format_exc
+            self.child.send(f'[Local Message] 不能正常加载{model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+            raise RuntimeError(f"不能正常加载{model_name}的参数！")
+
+        while True:
+            # 进入任务等待状态
+            kwargs = self.child.recv()
+            # 收到消息，开始请求
+            try:
+                for response_full in self.llm_stream_generator(**kwargs):
+                    self.child.send(response_full)
+            except:
+                from toolbox import trimmed_format_exc
+                self.child.send(f'[Local Message] 调用{model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
+            # 请求处理结束，开始下一个循环
+            self.child.send('[Finish]')
+
+    def stream_chat(self, **kwargs):
+        # ⭐主进程执行
+        self.threadLock.acquire()
+        self.parent.send(kwargs)
+        while True:
+            res = self.parent.recv()
+            if res != '[Finish]':
+                yield res
+            else:
+                break
+        self.threadLock.release()
+    
+    
+# ------------------------------------------------------------------------------------------------------------------------
+# 🔌💻 GPT-Academic
+# ------------------------------------------------------------------------------------------------------------------------
+def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
+    """
+        ⭐多线程方法
+        函数的说明请见 request_llm/bridge_all.py
+    """
+    _llm_handle = GetInternlmHandle()
+    if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
+    if not _llm_handle.success: 
+        error = _llm_handle.info
+        _llm_handle = None
+        raise RuntimeError(error)
+
+    # chatglm 没有 sys_prompt 接口，因此把prompt加入 history
+    history_feedin = []
+    history_feedin.append(["What can I do?", sys_prompt])
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]] )
+
+    watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
+    response = ""
+    for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        if len(observe_window) >= 1:  observe_window[0] = response
+        if len(observe_window) >= 2:  
+            if (time.time()-observe_window[1]) > watch_dog_patience:
+                raise RuntimeError("程序终止。")
+    return response
+
+
+
+def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
+    """
+        ⭐单线程方法
+        函数的说明请见 request_llm/bridge_all.py
+    """
+    chatbot.append((inputs, ""))
+
+    _llm_handle = GetInternlmHandle()
+    chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.info)
+    yield from update_ui(chatbot=chatbot, history=[])
+    if not _llm_handle.success: 
+        _llm_handle = None
+        return
+
+    if additional_fn is not None:
+        import core_functional
+        importlib.reload(core_functional)    # 热更新prompt
+        core_functional = core_functional.get_core_functions()
+        if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs)  # 获取预处理函数（如果有的话）
+        inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
+
+    # 处理历史信息
+    history_feedin = []
+    history_feedin.append(["What can I do?", system_prompt] )
+    for i in range(len(history)//2):
+        history_feedin.append([history[2*i], history[2*i+1]] )
+
+    # 开始接收chatglm的回复
+    response = f"[Local Message]: 等待{model_name}响应中 ..."
+    for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
+        chatbot[-1] = (inputs, response)
+        yield from update_ui(chatbot=chatbot, history=history)
+
+    # 总结输出
+    if response == f"[Local Message]: 等待{model_name}响应中 ...":
+        response = f"[Local Message]: {model_name}响应异常 ..."
+    history.extend([inputs, response])
+    yield from update_ui(chatbot=chatbot, history=history)
--- a/request_llm/edge_gpt_free.py
+++ b/request_llm/edge_gpt_free.py
@ -447,6 +447,15 @@ class _ChatHub:
        """
        Ask a question to the bot
        """
+        req_header = HEADERS
+        if self.cookies is not None:
+            ws_cookies = []
+            for cookie in self.cookies:
+                ws_cookies.append(f"{cookie['name']}={cookie['value']}")
+            req_header.update({
+                'Cookie': ';'.join(ws_cookies),
+            })
+            
        timeout = aiohttp.ClientTimeout(total=30)
        self.session = aiohttp.ClientSession(timeout=timeout)

@ -455,7 +464,7 @@ class _ChatHub:
        # Check if websocket is closed
        self.wss = await self.session.ws_connect(
            wss_link,
-            headers=HEADERS,
+            headers=req_header,
            ssl=ssl_context,
            proxy=self.proxy,
            autoping=False,
@ -1109,4 +1118,4 @@ class ImageQuery(Query):


 if __name__ == "__main__":
-    main()
+    main()
--- a/request_llm/test_llms.py
+++ b/request_llm/test_llms.py
@ -14,7 +14,8 @@ if __name__ == "__main__":
    # from request_llm.bridge_moss import predict_no_ui_long_connection
    # from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
    # from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection
-    from request_llm.bridge_claude import predict_no_ui_long_connection
+    # from request_llm.bridge_claude import predict_no_ui_long_connection
+    from request_llm.bridge_internlm import predict_no_ui_long_connection

    llm_kwargs = {
        'max_length': 512,
@ -22,45 +23,8 @@ if __name__ == "__main__":
        'temperature': 1,
    }

-    result = predict_no_ui_long_connection(inputs="你好", 
-                                        llm_kwargs=llm_kwargs,
-                                        history=[],
-                                        sys_prompt="")
+    result = predict_no_ui_long_connection( inputs="请问什么是质子？", 
+                                            llm_kwargs=llm_kwargs,
+                                            history=["你好", "我好！"],
+                                            sys_prompt="")
    print('final result:', result)
-
-
-
-    # # print(result)
-    # from multiprocessing import Process, Pipe
-    # class GetGLMHandle(Process):
-    #     def __init__(self):
-    #         super().__init__(daemon=True)
-    #         pass
-    #     def run(self):
-    #         # 子进程执行
-    #         # 第一次运行，加载参数
-    #         def validate_path():
-    #             import os, sys
-    #             dir_name = os.path.dirname(__file__)
-    #             root_dir_assume = os.path.abspath(os.path.dirname(__file__) +  '/..')
-    #             os.chdir(root_dir_assume + '/request_llm/jittorllms')
-    #             sys.path.append(root_dir_assume + '/request_llm/jittorllms')
-    #         validate_path() # validate path so you can run from base directory
-    #         jittorllms_model = None
-    #         import types
-    #         try:
-    #             if jittorllms_model is None:
-    #                 from models import get_model
-    #                 # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
-    #                 args_dict = {'model': 'chatrwkv'}
-    #                 print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))')
-    #                 jittorllms_model = get_model(types.SimpleNamespace(**args_dict))
-    #                 print('done get model')
-    #         except:
-    #             # self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。')
-    #             raise RuntimeError("不能正常加载jittorllms的参数！")
-    # x = GetGLMHandle()
-    # x.start()
-
-
-    # input()
--- a/toolbox.py
+++ b/toolbox.py
@ -883,4 +883,16 @@ def objload(file='objdump.tmp'):
        return
    with open(file, 'rb') as f:
        return pickle.load(f)
-    
+    
+def Singleton(cls):
+    """
+    一个单实例装饰器
+    """
+    _instance = {}
+ 
+    def _singleton(*args, **kargs):
+        if cls not in _instance:
+            _instance[cls] = cls(*args, **kargs)
+        return _instance[cls]
+ 
+    return _singleton
Author	SHA1	Message	Date
binary-husky	f43cea08ef	no stream test	2023-07-20 12:11:22 +08:00
binary-husky	df90db210c	Merge branch 'master' into interface-interlm	2023-07-20 11:40:45 +08:00
binary-husky	0927ed20a2	edit default configuration	2023-07-20 11:39:35 +08:00
binary-husky	73b22f85be	compat third party gpt error handle	2023-07-20 11:09:22 +08:00
binary-husky	b8d77557b0	Update README.md	2023-07-20 10:12:42 +08:00
binary-husky	99b8fce8f3	Merge pull request #965 from QQisQQ/patch-2 解决new bing 报错200 (fix new bing error code 200 )	2023-07-19 10:15:15 +08:00
binary-husky	16364f1b2d	Merge pull request #966 from doujiang-zheng/master Add timestamp for chat_secrets.log and disable the verbose httpx log.	2023-07-19 10:14:36 +08:00
doujiang-zheng	3b88e00cfb	Add timestamp for chat_secrets.log and disable the verbose httpx log.	2023-07-19 09:43:59 +08:00
QQisQQ	0c8c539e9b	解决new bing 报错200 (fix new bing error code 200 ) modify from `16e00af9d5` works for my issue: ``` Traceback (most recent call last): File "./request_llm/bridge_newbingfree.py", line 152, in run asyncio.run(self.async_run()) File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/runners.py", line 190, in run return runner.run(main) ^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/runners.py", line 118, in run return self._loop.run_until_complete(task) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py311/lib/python3.11/asyncio/base_events.py", line 653, in run_until_complete return future.result() ^^^^^^^^^^^^^^^ File "./request_llm/bridge_newbingfree.py", line 98, in async_run async for final, response in self.newbing_model.ask_stream( File "./request_llm/edge_gpt_free.py", line 676, in ask_stream async for response in self.chat_hub.ask_stream( File "./request_llm/edge_gpt_free.py", line 456, in ask_stream self.wss = await self.session.ws_connect( ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py311/lib/python3.11/site-packages/aiohttp/client.py", line 795, in _ws_connect raise WSServerHandshakeError( aiohttp.client_exceptions.WSServerHandshakeError: 200, message='Invalid response status', url=URL('wss://sydney.bing.com/sydney/ChatHub') ```	2023-07-19 04:39:15 +08:00
binary-husky	fd549fb986	merge success	2023-07-18 19:51:13 +08:00
binary-husky	babb775cfb	interface with interlm	2023-07-18 16:33:34 +08:00