个人助理

version 3.47
Merge pull request #979 from fenglui/master
2023-07-26 17:02:40 +08:00 · 2023-07-24 19:58:47 +08:00 · 2023-07-24 19:52:27 +08:00 · 2023-07-24 18:50:29 +08:00 · 2023-07-24 18:19:57 +08:00 · 2023-07-24 18:06:15 +08:00
9 changed files with 82 additions and 21 deletions
--- a/README.md
+++ b/README.md
@ -93,7 +93,7 @@ Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法

 1. 下载项目
 ```sh
-git clone https://github.com/binary-husky/gpt_academic.git
+git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
 cd gpt_academic
 ```

@ -126,7 +126,7 @@ python -m pip install -r request_llm/requirements_chatglm.txt

 # 【可选步骤II】支持复旦MOSS
 python -m pip install -r request_llm/requirements_moss.txt
-git clone https://github.com/OpenLMLab/MOSS.git request_llm/moss  # 注意执行此行代码时，必须处于项目根路径
+git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llm/moss  # 注意执行此行代码时，必须处于项目根路径

 # 【可选步骤III】确保config.py配置文件的AVAIL_LLM_MODELS包含了期望的模型，目前支持的全部模型如下(jittorllms系列目前仅支持docker方案)：
 AVAIL_LLM_MODELS = ["gpt-3.5-turbo", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "newbing", "moss"] # + ["jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
@ -149,7 +149,7 @@ python main.py
 [![basiclatex](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml/badge.svg?branch=master)](https://github.com/binary-husky/gpt_academic/actions/workflows/build-with-latex.yml)

 ``` sh
-git clone https://github.com/binary-husky/gpt_academic.git  # 下载项目
+git clone --depth=1 https://github.com/binary-husky/gpt_academic.git  # 下载项目
 cd gpt_academic                                 # 进入路径
 nano config.py                                      # 用任意文本编辑器编辑config.py, 配置 “Proxy”， “API_KEY” 以及 “WEB_PORT” (例如50923) 等
 docker build -t gpt-academic .                      # 安装
--- a/config.py
+++ b/config.py
@ -80,6 +80,7 @@ ChatGLM_PTUNING_CHECKPOINT = "" # 例如"/home/hmp/ChatGLM2-6B/ptuning/output/6b

 # 本地LLM模型如ChatGLM的执行方式 CPU/GPU
 LOCAL_MODEL_DEVICE = "cpu" # 可选 "cuda"
+LOCAL_MODEL_QUANT = "FP16" # 默认 "FP16" "INT4" 启用量化INT4版本 "INT8" 启用量化INT8版本


 # 设置gradio的并行线程数（不需要修改）
@ -136,4 +137,8 @@ ALIYUN_APPKEY=""   # 例如 RoPlZrM88DnAFkZK


 # Claude API KEY
-ANTHROPIC_API_KEY = ""
+ANTHROPIC_API_KEY = ""
+
+
+# 自定义API KEY格式
+CUSTOM_API_KEY_PATTERN = ""
--- a/crazy_functions/latex_fns/latex_actions.py
+++ b/crazy_functions/latex_fns/latex_actions.py
@ -22,7 +22,8 @@ def split_subprocess(txt, project_folder, return_dict, opts):
    mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM

    # 吸收title与作者以上的部分
-    text, mask = set_forbidden_text(text, mask, r"(.*?)\\maketitle", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL)
+    text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL)
    # 吸收iffalse注释
    text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
    # 吸收在42行以内的begin-end组合
--- a/crazy_functions/live_audio/aliyunASR.py
+++ b/crazy_functions/live_audio/aliyunASR.py
@ -19,7 +19,7 @@ class AliyunASR():
        pass

    def test_on_error(self, message, *args):
-        # print("on_error args=>{}".format(args))
+        print("on_error args=>{}".format(args))
        pass

    def test_on_close(self, *args):
@ -50,6 +50,8 @@ class AliyunASR():
        rad.clean_up()
        temp_folder = tempfile.gettempdir()
        TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
+        if len(TOKEN) == 0:
+            TOKEN = self.get_token()
        self.aliyun_service_ok = True
        URL="wss://nls-gateway.aliyuncs.com/ws/v1"
        sr = nls.NlsSpeechTranscriber(
@ -91,3 +93,38 @@ class AliyunASR():
                self.stop = True
                self.stop_msg = 'Aliyun音频服务异常，请检查ALIYUN_TOKEN和ALIYUN_APPKEY是否过期。'
        r = sr.stop()
+
+    def get_token(self):
+        from toolbox import get_conf
+        import json
+        from aliyunsdkcore.request import CommonRequest
+        from aliyunsdkcore.client import AcsClient
+        AccessKey_ID, AccessKey_secret = get_conf('ALIYUN_ACCESSKEY', 'ALIYUN_SECRET')
+
+        # 创建AcsClient实例
+        client = AcsClient(
+            AccessKey_ID,
+            AccessKey_secret,
+            "cn-shanghai"
+        )
+
+        # 创建request，并设置参数。
+        request = CommonRequest()
+        request.set_method('POST')
+        request.set_domain('nls-meta.cn-shanghai.aliyuncs.com')
+        request.set_version('2019-02-28')
+        request.set_action_name('CreateToken')
+
+        try:
+            response = client.do_action_with_exception(request)
+            print(response)
+            jss = json.loads(response)
+            if 'Token' in jss and 'Id' in jss['Token']:
+                token = jss['Token']['Id']
+                expireTime = jss['Token']['ExpireTime']
+                print("token = " + token)
+                print("expireTime = " + str(expireTime))
+        except Exception as e:
+            print(e)
+
+        return token
--- a/crazy_functions/语音助手.py
+++ b/crazy_functions/语音助手.py
@ -179,12 +179,12 @@ def 语音助手(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt
        import nls
        from scipy import io
    except:
-        chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
+        chatbot.append(["导入依赖失败", "使用该模块需要额外依赖, 安装方法:```pip install --upgrade aliyun-python-sdk-core==2.13.3 pyOpenSSL scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git```"])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return

-    TOKEN, APPKEY = get_conf('ALIYUN_TOKEN', 'ALIYUN_APPKEY')
-    if TOKEN == "" or APPKEY == "":
+    APPKEY = get_conf('ALIYUN_APPKEY')
+    if APPKEY == "":
        chatbot.append(["导入依赖失败", "没有阿里云语音识别APPKEY和TOKEN, 详情见https://help.aliyun.com/document_detail/450255.html"])
        yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
        return
--- a/request_llm/bridge_chatglm.py
+++ b/request_llm/bridge_chatglm.py
@ -37,15 +37,23 @@ class GetGLMHandle(Process):
        # 子进程执行
        # 第一次运行，加载参数
        retry = 0
+        LOCAL_MODEL_QUANT, device = get_conf('LOCAL_MODEL_QUANT', 'LOCAL_MODEL_DEVICE')
+
+        if LOCAL_MODEL_QUANT == "INT4":         # INT4
+            _model_name_ = "THUDM/chatglm2-6b-int4"
+        elif LOCAL_MODEL_QUANT == "INT8":       # INT8
+            _model_name_ = "THUDM/chatglm2-6b-int8"
+        else:
+            _model_name_ = "THUDM/chatglm2-6b"  # FP16
+
        while True:
            try:
                if self.chatglm_model is None:
-                    self.chatglm_tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True)
-                    device, = get_conf('LOCAL_MODEL_DEVICE')
+                    self.chatglm_tokenizer = AutoTokenizer.from_pretrained(_model_name_, trust_remote_code=True)
                    if device=='cpu':
-                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).float()
+                        self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).float()
                    else:
-                        self.chatglm_model = AutoModel.from_pretrained("THUDM/chatglm2-6b", trust_remote_code=True).half().cuda()
+                        self.chatglm_model = AutoModel.from_pretrained(_model_name_, trust_remote_code=True).half().cuda()
                    self.chatglm_model = self.chatglm_model.eval()
                    break
                else:
--- a/request_llm/edge_gpt_free.py
+++ b/request_llm/edge_gpt_free.py
@ -519,7 +519,11 @@ class _ChatHub:
        resp_txt_no_link = ""
        while not final:
            msg = await self.wss.receive()
-            objects = msg.data.split(DELIMITER)
+            try:
+                objects = msg.data.split(DELIMITER)
+            except :
+                continue
+            
            for obj in objects:
                if obj is None or not obj:
                    continue
--- a/toolbox.py
+++ b/toolbox.py
@ -538,7 +538,11 @@ def load_chat_cookies():
    return {'api_key': API_KEY, 'llm_model': LLM_MODEL}

 def is_openai_api_key(key):
-    API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
+    CUSTOM_API_KEY_PATTERN, = get_conf('CUSTOM_API_KEY_PATTERN')
+    if len(CUSTOM_API_KEY_PATTERN) != 0:
+        API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
+    else:
+        API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
    return bool(API_MATCH_ORIGINAL)

 def is_azure_api_key(key):
@ -594,7 +598,7 @@ def select_api_key(keys, llm_model):
            if is_azure_api_key(k): avail_key_list.append(k)

    if len(avail_key_list) == 0:
-        raise RuntimeError(f"您提供的api-key不满足要求，不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源（右下角更换模型菜单中可切换openai,azure和api2d请求源）")
+        raise RuntimeError(f"您提供的api-key不满足要求，不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源（右下角更换模型菜单中可切换openai,azure,claude,api2d等请求源）。")

    api_key = random.choice(avail_key_list) # 随机负载均衡
    return api_key
@ -670,13 +674,14 @@ def read_single_conf_with_lru_cache(arg):

    # 在读取API_KEY时，检查一下是不是忘了改config
    if arg == 'API_KEY':
-        print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和API2D的api-key。也支持同时填写多个api-key，如API_KEY=\"openai-key1,openai-key2,api2d-key3\"")
+        print亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key，如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
        print亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s)，也可以在问题输入区输入临时的api-key(s)，然后回车键提交后即可生效。")
        if is_any_api_key(r):
            print亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
        else:
-            print亮红( "[API_KEY] 正确的 API_KEY 是'sk'开头的51位密钥（OpenAI），或者 'fk'开头的41位密钥，请在config文件中修改API密钥之后再运行。")
+            print亮红( "[API_KEY] 您的 API_KEY 不满足任何一种已知的密钥格式，请在config文件中修改API密钥之后再运行。")
    if arg == 'proxies':
+        if not read_single_conf_with_lru_cache('USE_PROXY'): r = None   # 检查USE_PROXY，防止proxies单独起作用
        if r is None:
            print亮红('[PROXY] 网络代理状态：未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议：检查USE_PROXY选项是否修改。')
        else:
@ -685,6 +690,7 @@ def read_single_conf_with_lru_cache(arg):
    return r


+@lru_cache(maxsize=128)
 def get_conf(*args):
    # 建议您复制一个config_private.py放自己的秘密, 如API和代理网址, 避免不小心传github被别人看到
    res = []
--- a/6
+++ b/6
@ -1,5 +1,5 @@
 {
-  "version": 3.46,
+  "version": 3.47,
  "show_feature": true,
-  "new_feature": "临时修复theme的文件丢失问题 <-> 新增实时语音对话插件（自动断句，脱手对话） <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件"
-}
+  "new_feature": "优化一键升级 <-> 提高arxiv翻译速度和成功率 <-> 支持自定义APIKEY格式 <-> 临时修复theme的文件丢失问题 <-> 新增实时语音对话插件（自动断句，脱手对话） <-> 支持加载自定义的ChatGLM2微调模型 <-> 动态ChatBot窗口高度 <-> 修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持"
+}
Author	SHA1	Message	Date
qingxu fu	b6439711c3	个人助理	2023-07-26 17:02:40 +08:00
binary-husky	e4e2430255	version 3.47	2023-07-24 19:58:47 +08:00
binary-husky	1732127a28	Merge pull request #979 from fenglui/master 增加chatGLM int4配置支持小显存也可以选择chatGLM	2023-07-24 19:52:27 +08:00
binary-husky	56bb8b6498	improve re efficiency	2023-07-24 18:50:29 +08:00
binary-husky	e93b6fa3a6	Add GLM INT8	2023-07-24 18:19:57 +08:00
binary-husky	dd4ba0ea22	Merge branch 'master' of https://github.com/fenglui/gpt_academic into fenglui-master	2023-07-24 18:06:15 +08:00
binary-husky	c2701c9ce5	Merge pull request #986 from one-pr/git-clone 默认仅 clone 最新的代码，减小 git clone 的大小	2023-07-24 17:48:35 +08:00
woclass	2f019ce359	优化 README.md 中的其他 git clone	2023-07-24 15:14:48 +08:00
woclass	c5b147aeb7	默认仅 clone 最新的代码，减小 git clone 的大小	2023-07-24 15:14:42 +08:00
fenglui	5813d65e52	增加chatGLM int4配置支持小显存也可以选择chatGLM	2023-07-22 08:29:15 +08:00
binary-husky	a393edfaa4	ALLOW CUSTOM API KEY PATTERN	2023-07-21 22:49:07 +08:00
binary-husky	dd7a01cda5	Merge pull request #976 from fenglui/master fix msg.data.split(DELIMITER) exception when msg.data is int	2023-07-21 17:02:29 +08:00
fenglui	00a3b91f95	fix msg.data.split(DELIMITER) exception when msg.data is int	2023-07-21 03:51:33 +08:00