解决用户文件上传冲突

update error message
Merge pull request #933 from binary-husky/master-latex-patch
2023-07-07 17:54:47 +08:00 · 2023-07-07 17:41:43 +08:00 · 2023-07-07 16:57:58 +08:00 · 2023-07-07 10:55:22 +08:00 · 2023-07-07 10:54:21 +08:00 · 2023-07-07 10:47:30 +08:00
10 changed files with 95 additions and 112 deletions
--- a/config.py
+++ b/config.py
@ -70,7 +70,7 @@ MAX_RETRY = 2

 # 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
 LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
-AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt35", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
+AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
 # P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "newbing-free", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]


@ -109,10 +109,10 @@ SLACK_CLAUDE_USER_TOKEN = ''


 # 如果需要使用AZURE 详情请见额外文档 docs\use_azure.md
-AZURE_ENDPOINT = "https://你的api名称.openai.azure.com/"
+AZURE_ENDPOINT = "https://你亲手写的api名称.openai.azure.com/"
 AZURE_API_KEY = "填入azure openai api的密钥"
-AZURE_API_VERSION = "填入api版本"
-AZURE_ENGINE = "填入ENGINE"
+AZURE_API_VERSION = "2023-05-15"        # 一般不修改
+AZURE_ENGINE = "填入你亲手写的部署名"     # 读 docs\use_azure.md


 # 使用Newbing
--- a/crazy_functions/crazy_utils.py
+++ b/crazy_functions/crazy_utils.py
@ -130,6 +130,11 @@ def request_gpt_model_in_new_thread_with_ui_alive(
    yield from update_ui(chatbot=chatbot, history=[]) # 如果最后成功了，则删除报错信息
    return final_result

+def can_multi_process(llm):
+    if llm.startswith('gpt-'): return True
+    if llm.startswith('api2d-'): return True
+    if llm.startswith('azure-'): return True
+    return False

 def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        inputs_array, inputs_show_user_array, llm_kwargs, 
@ -175,7 +180,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
        except: max_workers = 8
        if max_workers <= 0: max_workers = 3
    # 屏蔽掉 chatglm的多线程，可能会导致严重卡顿
-    if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
+    if not can_multi_process(llm_kwargs['llm_model']):
        max_workers = 1
        
    executor = ThreadPoolExecutor(max_workers=max_workers)
--- a/crazy_functions/latex_utils.py
+++ b/crazy_functions/latex_utils.py
@ -189,6 +189,18 @@ def rm_comments(main_file):
    main_file = re.sub(r'(?<!\\)%.*', '', main_file)  # 使用正则表达式查找半行注释, 并替换为空字符串
    return main_file

+def find_tex_file_ignore_case(fp):
+    dir_name = os.path.dirname(fp)
+    base_name = os.path.basename(fp)
+    if not base_name.endswith('.tex'): base_name+='.tex'
+    if os.path.exists(pj(dir_name, base_name)): return pj(dir_name, base_name)
+    # go case in-sensitive
+    import glob
+    for f in glob.glob(dir_name+'/*.tex'):
+        base_name_s = os.path.basename(fp)
+        if base_name_s.lower() == base_name.lower(): return f
+    return None
+
 def merge_tex_files_(project_foler, main_file, mode):
    """
    Merge Tex project recrusively
@ -197,15 +209,11 @@ def merge_tex_files_(project_foler, main_file, mode):
    for s in reversed([q for q in re.finditer(r"\\input\{(.*?)\}", main_file, re.M)]):
        f = s.group(1)
        fp = os.path.join(project_foler, f)
-        if os.path.exists(fp):  
-            # e.g., \input{srcs/07_appendix.tex}
-            with open(fp, 'r', encoding='utf-8', errors='replace') as fx:
-                c = fx.read()
-        else:  
-            # e.g., \input{srcs/07_appendix}
-            assert os.path.exists(fp+'.tex'), f'即找不到{fp}，也找不到{fp}.tex，Tex源文件缺失！'
-            with open(fp+'.tex', 'r', encoding='utf-8', errors='replace') as fx:
-                c = fx.read()
+        fp = find_tex_file_ignore_case(fp)
+        if fp:
+            with open(fp, 'r', encoding='utf-8', errors='replace') as fx: c = fx.read()
+        else:
+            raise RuntimeError(f'找不到{fp}，Tex源文件缺失！')
        c = merge_tex_files_(project_foler, c, mode)
        main_file = main_file[:s.span()[0]] + c + main_file[s.span()[1]:]
    return main_file
--- a/crazy_functions/总结word文档.py
+++ b/crazy_functions/总结word文档.py
@ -14,17 +14,19 @@ def 解析docx(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot
            doc = Document(fp)
            file_content = "\n".join([para.text for para in doc.paragraphs])
        else:
-            import win32com.client
-            word = win32com.client.Dispatch("Word.Application")
-            word.visible = False
-            # 打开文件
-            print('fp', os.getcwd())
-            doc = word.Documents.Open(os.getcwd() + '/' + fp)
-            # file_content = doc.Content.Text
-            doc = word.ActiveDocument
-            file_content = doc.Range().Text
-            doc.Close()
-            word.Quit()
+            try:
+                import win32com.client
+                word = win32com.client.Dispatch("Word.Application")
+                word.visible = False
+                # 打开文件
+                doc = word.Documents.Open(os.getcwd() + '/' + fp)
+                # file_content = doc.Content.Text
+                doc = word.ActiveDocument
+                file_content = doc.Range().Text
+                doc.Close()
+                word.Quit()
+            except:
+                raise RuntimeError('请先将.doc文档转换为.docx文档。')

        print(file_content)
        # private_upload里面的文件名在解压zip后容易出现乱码（rar和7z格式正常），故可以只分析文章内容，不输入文件名
--- a/docs/use_azure.md
+++ b/docs/use_azure.md
@ -90,62 +90,29 @@

 到现在为止，申请操作就完成了，需要记下来的有下面几个东西：

-● 密钥（1或2都可以）
+● 密钥（对应AZURE_API_KEY，1或2都可以）

-● 终结点
+● 终结点 （对应AZURE_ENDPOINT）
+
+● 部署名（对应AZURE_ENGINE，不是模型名）

-● 部署名（不是模型名）

 # 修改 config.py

 ```
-AZURE_ENDPOINT = "填入终结点"
+LLM_MODEL = "azure-gpt-3.5"  # 指定启动时的默认模型，当然事后从下拉菜单选也ok
+
+AZURE_ENDPOINT = "填入终结点" # 见上述图片
 AZURE_API_KEY = "填入azure openai api的密钥"
 AZURE_API_VERSION = "2023-05-15"  # 默认使用 2023-05-15 版本，无需修改
-AZURE_ENGINE = "填入部署名"
-
-```
-# API的使用
-
-接下来就是具体怎么使用API了，还是可以参考官方文档：[快速入门 - 开始通过 Azure OpenAI 服务使用 ChatGPT 和 GPT-4 - Azure OpenAI Service | Microsoft Learn](https://learn.microsoft.com/zh-cn/azure/cognitive-services/openai/chatgpt-quickstart?pivots=programming-language-python)
-
-和openai自己的api调用有点类似，都需要安装openai库，不同的是调用方式
-
-```
-import openai
-openai.api_type = "azure" #固定格式，无需修改
-openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT") #这里填入“终结点”
-openai.api_version = "2023-05-15" #固定格式，无需修改
-openai.api_key = os.getenv("AZURE_OPENAI_KEY") #这里填入“密钥1”或“密钥2”
-
-response = openai.ChatCompletion.create(
-    engine="gpt-35-turbo", #这里填入的不是模型名，是部署名
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Does Azure OpenAI support customer managed keys?"},
-        {"role": "assistant", "content": "Yes, customer managed keys are supported by Azure OpenAI."},
-        {"role": "user", "content": "Do other Azure Cognitive Services support this too?"}
-    ]
-)
-
-print(response)
-print(response['choices'][0]['message']['content'])
+AZURE_ENGINE = "填入部署名" # 见上述图片

 ```

-需要注意的是：
-
-1.  engine那里填入的是部署名，不是模型名
-
-2.  通过openai库获得的这个 response 和通过 request 库访问 url 获得的 response 不同，不需要 decode，已经是解析好的 json 了，直接根据键值读取即可。
-
-更细节的使用方法，详见官方API文档。

 # 关于费用

-Azure OpenAI API 还是需要一些费用的（免费订阅只有1个月有效期），费用如下：
-
-![image.png](https://note.youdao.com/yws/res/18095/WEBRESOURCEeba0ab6d3127b79e143ef2d5627c0e44)
+Azure OpenAI API 还是需要一些费用的（免费订阅只有1个月有效期）

 具体可以可以看这个网址 ：[Azure OpenAI 服务 - 定价| Microsoft Azure](https://azure.microsoft.com/zh-cn/pricing/details/cognitive-services/openai-service/?cdn=disable)

--- a/main.py
+++ b/main.py
@ -155,7 +155,7 @@ def main():
            click_handle = functional[k]["Button"].click(fn=ArgsGeneralWrapper(predict), inputs=[*input_combo, gr.State(True), gr.State(k)], outputs=output_combo)
            cancel_handles.append(click_handle)
        # 文件上传区，接收文件后与chatbot的互动
-        file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes], [chatbot, txt, txt2])
+        file_upload.upload(on_file_uploaded, [file_upload, chatbot, txt, txt2, checkboxes, cookies], [chatbot, txt, txt2])
        # 函数插件-固定按钮区
        for k in crazy_fns:
            if not crazy_fns[k].get("AsButton", True): continue
@ -185,6 +185,12 @@ def main():
        # 终止按钮的回调函数注册
        stopBtn.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
        stopBtn2.click(fn=None, inputs=None, outputs=None, cancels=cancel_handles)
+        def init_cookie(cookies, chatbot):
+            # 为每一位访问的用户赋予一个独一无二的uuid编码
+            import uuid
+            cookies.update({'user-uuid': 'user-'+uuid.uuid4().hex})
+            return cookies
+        demo.load(init_cookie, inputs=[cookies, chatbot], outputs=[cookies])

    # gradio的inbrowser触发不太稳定，回滚代码到原始的浏览器打开函数
    def auto_opentab_delay():
--- a/request_llm/bridge_all.py
+++ b/request_llm/bridge_all.py
@ -121,7 +121,7 @@ model_info = {
    },

    # azure openai
-    "azure-gpt35":{
+    "azure-gpt-3.5":{
        "fn_with_ui": azure_ui,
        "fn_without_ui": azure_noui,
        "endpoint": get_conf("AZURE_ENDPOINT"),
--- a/request_llm/bridge_azure_test.py
+++ b/request_llm/bridge_azure_test.py
@ -14,7 +14,8 @@ import traceback
 import importlib
 import openai
 import time
-
+import requests
+import json

 # 读取config.py文件中关于AZURE OPENAI API的信息
 from toolbox import get_conf, update_ui, clip_history, trimmed_format_exc
@ -43,7 +44,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    chatbot 为WebUI中显示的对话列表，修改它，然后yeild出去，可以直接修改对话界面内容
    additional_fn代表点击的哪个按钮，按钮见functional.py
    """
-    print(llm_kwargs["llm_model"])    

    if additional_fn is not None:
        import core_functional
@ -56,7 +56,6 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
    logging.info(f'[raw_input] {raw_input}')
    chatbot.append((inputs, ""))
    yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
-
    
    payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt, stream)    
        
@ -64,20 +63,22 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp

    retry = 0
    while True:
-        try:            
-                
+        try:
            openai.api_type = "azure"            
            openai.api_version = AZURE_API_VERSION
            openai.api_base = AZURE_ENDPOINT
            openai.api_key = AZURE_API_KEY
            response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
-        
+        except openai.error.AuthenticationError:
+            tb_str = '```\n' + trimmed_format_exc() + '```'
+            chatbot[-1] = [chatbot[-1][0], tb_str]
+            yield from update_ui(chatbot=chatbot, history=history, msg="openai返回错误") # 刷新界面
+            return
        except:
            retry += 1
-            chatbot[-1] = ((chatbot[-1][0], "获取response失败，重试中。。。"))
-            retry_msg = f"，正在重试 ({retry}/{MAX_RETRY}) ……" if MAX_RETRY > 0 else ""
-            yield from update_ui(chatbot=chatbot, history=history, msg="请求超时"+retry_msg) # 刷新界面
+            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')
            
    gpt_replying_buffer = ""    
    is_head_of_the_stream = True
@ -141,20 +142,17 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
    payload = generate_azure_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
    retry = 0
    while True:
-
        try:
            openai.api_type = "azure"            
            openai.api_version = AZURE_API_VERSION
            openai.api_base = AZURE_ENDPOINT
            openai.api_key = AZURE_API_KEY
            response = openai.ChatCompletion.create(timeout=TIMEOUT_SECONDS, **payload);break
-        
-        except:  
+        except:
            retry += 1
            traceback.print_exc()
            if retry > MAX_RETRY: raise TimeoutError
-            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')     
-        
+            if MAX_RETRY!=0: print(f'请求超时，正在重试 ({retry}/{MAX_RETRY}) ……')

    stream_response =  response
    result = ''
@ -164,19 +162,14 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
            break
        except:
            chunk = next(stream_response) # 失败了，重试一次？再失败就没办法了。
-
        if len(chunk)==0: continue
-        if not chunk.startswith('data:'): 
-            error_msg = get_full_error(chunk, stream_response)
-            if "reduce the length" in error_msg:
-                raise ConnectionAbortedError("AZURE OPENAI API拒绝了请求:" + error_msg)
-            else:
-                raise RuntimeError("AZURE OPENAI API拒绝了请求：" + error_msg)
-        if ('data: [DONE]' in chunk): break 
-        
-        delta = chunk["delta"]
-        if len(delta) == 0: break
-        if "role" in delta: continue
+
+        json_data = json.loads(str(chunk))['choices'][0]
+        delta = json_data["delta"]
+        if len(delta) == 0:
+            break
+        if "role" in delta:
+            continue
        if "content" in delta: 
            result += delta["content"]
            if not console_slience: print(delta["content"], end='')
@ -184,11 +177,14 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
                # 观测窗，把已经获取的数据显示出去
                if len(observe_window) >= 1: observe_window[0] += delta["content"]
                # 看门狗，如果超过期限没有喂狗，则终止
-                if len(observe_window) >= 2:  
+                if len(observe_window) >= 2000:  
                    if (time.time()-observe_window[1]) > watch_dog_patience:
                        raise RuntimeError("用户取消了程序。")
-        else: raise RuntimeError("意外Json结构："+delta)
-    if chunk['finish_reason'] == 'length':
+        else:
+            raise RuntimeError("意外Json结构："+delta)
+    if json_data['finish_reason'] == 'content_filter':
+        raise RuntimeError("由于提问含不合规内容被Azure过滤。")
+    if json_data['finish_reason'] == 'length':
        raise ConnectionAbortedError("正常结束，但显示Token不足，导致输出不完整，请削减单次输入的文本量。")
    return result

--- a/toolbox.py
+++ b/toolbox.py
@ -452,7 +452,7 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
        else: current = []
        chatbot._cookies.update({'file_to_promote': [new_path] + current})

-def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
+def on_file_uploaded(files, chatbot, txt, txt2, checkboxes, cookies):
    """
    当文件被上传时的回调函数
    """
@ -463,24 +463,23 @@ def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
    import time
    import glob
    from toolbox import extract_archive
-    try:
-        shutil.rmtree('./private_upload/')
-    except:
-        pass
+    user_uuid = cookies.get('user-uuid', 'unknown_user')
+    try: shutil.rmtree(f'./private_upload/{user_uuid}')
+    except: pass
    time_tag = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
-    os.makedirs(f'private_upload/{time_tag}', exist_ok=True)
+    os.makedirs(f'private_upload/{user_uuid}/{time_tag}', exist_ok=True)
    err_msg = ''
    for file in files:
        file_origin_name = os.path.basename(file.orig_name)
-        shutil.copy(file.name, f'private_upload/{time_tag}/{file_origin_name}')
-        err_msg += extract_archive(f'private_upload/{time_tag}/{file_origin_name}',
-                                   dest_dir=f'private_upload/{time_tag}/{file_origin_name}.extract')
-    moved_files = [fp for fp in glob.glob('private_upload/**/*', recursive=True)]
+        shutil.copy(file.name, f'private_upload/{user_uuid}/{time_tag}/{file_origin_name}')
+        err_msg += extract_archive(f'private_upload/{user_uuid}/{time_tag}/{file_origin_name}',
+                                   dest_dir=f'private_upload/{user_uuid}/{time_tag}/{file_origin_name}.extract')
+    moved_files = [fp for fp in glob.glob(f'private_upload/{user_uuid}/**/*', recursive=True)]
    if "底部输入区" in checkboxes:
        txt = ""
-        txt2 = f'private_upload/{time_tag}'
+        txt2 = f'private_upload/{user_uuid}/{time_tag}'
    else:
-        txt = f'private_upload/{time_tag}'
+        txt = f'private_upload/{user_uuid}/{time_tag}'
        txt2 = ""
    moved_files_str = '\t\n\n'.join(moved_files)
    chatbot.append(['我上传了文件，请查收',
--- a/4
+++ b/4
@ -1,5 +1,5 @@
 {
-  "version": 3.42,
+  "version": 3.43,
  "show_feature": true,
-  "new_feature": "完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件 <-> 添加了OpenAI音频转文本总结插件 <-> 通过Slack添加对Claude的支持"
+  "new_feature": "修复Azure接口的BUG <-> 完善多语言模块 <-> 完善本地Latex矫错和翻译功能 <-> 增加gpt-3.5-16k的支持 <-> 新增最强Arxiv论文翻译插件 <-> 修复gradio复制按钮BUG <-> 修复PDF翻译的BUG, 新增HTML中英双栏对照 <-> 添加了OpenAI图片生成插件"
 }
Author	SHA1	Message	Date
qingxu fu	cefa3921c4	解决用户文件上传冲突	2023-07-07 17:54:47 +08:00
qingxu fu	67d9051890	update error message	2023-07-07 17:41:43 +08:00
binary-husky	be96232127	Merge pull request #933 from binary-husky/master-latex-patch Latex File Name Bug Patch	2023-07-07 16:57:58 +08:00
binary-husky	3b5bc7a784	Update use_azure.md	2023-07-07 10:55:22 +08:00
binary-husky	5e92f437a1	Update use_azure.md	2023-07-07 10:54:21 +08:00
qingxu fu	eabd9d312f	3.43	2023-07-07 10:47:30 +08:00
qingxu fu	0da6fe78ac	统一azure-gpt-3.5的格式	2023-07-07 10:45:11 +08:00
qingxu fu	be990380a0	Merge branch 'master' of https://github.com/binary-husky/chatgpt_academic into master	2023-07-07 10:42:41 +08:00
qingxu fu	9c0bc48420	修复Azure OpenAI接口的各种bug	2023-07-07 10:42:38 +08:00
binary-husky	5c0d34793e	Latex File Name Bug Patch	2023-07-07 00:09:50 +08:00