Compare commits
11 Commits
threejs-ap
...
no-stream
| Author | SHA1 | Date | |
|---|---|---|---|
| f43cea08ef | |||
| df90db210c | |||
| 0927ed20a2 | |||
| 73b22f85be | |||
| b8d77557b0 | |||
| 99b8fce8f3 | |||
| 16364f1b2d | |||
| 3b88e00cfb | |||
| 0c8c539e9b | |||
| fd549fb986 | |||
| babb775cfb |
@ -44,7 +44,7 @@ chat分析报告生成 | [函数插件] 运行后自动生成总结汇报
|
|||||||
Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
|
Latex论文一键校对 | [函数插件] 仿Grammarly对Latex文章进行语法、拼写纠错+输出对照PDF
|
||||||
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
|
[谷歌学术统合小助手](https://www.bilibili.com/video/BV19L411U7ia) | [函数插件] 给定任意谷歌学术搜索页面URL,让gpt帮你[写relatedworks](https://www.bilibili.com/video/BV1GP411U7Az/)
|
||||||
互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
|
互联网信息聚合+GPT | [函数插件] 一键[让GPT从互联网获取信息](https://www.bilibili.com/video/BV1om4y127ck)回答问题,让信息永不过时
|
||||||
⭐Arxiv论文精细翻译 | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
|
⭐Arxiv论文精细翻译 ([Docker](https://github.com/binary-husky/gpt_academic/pkgs/container/gpt_academic_with_latex)) | [函数插件] 一键[以超高质量翻译arxiv论文](https://www.bilibili.com/video/BV1dz4y1v77A/),目前最好的论文翻译工具
|
||||||
⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
|
⭐[实时语音对话输入](https://github.com/binary-husky/gpt_academic/blob/master/docs/use_audio.md) | [函数插件] 异步[监听音频](https://www.bilibili.com/video/BV1AV4y187Uy/),自动断句,自动寻找回答时机
|
||||||
公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
|
公式/图片/表格显示 | 可以同时显示公式的[tex形式和渲染形式](https://user-images.githubusercontent.com/96192199/230598842-1d7fcddd-815d-40ee-af60-baf488a199df.png),支持公式、代码高亮
|
||||||
多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
|
多线程函数插件支持 | 支持多线调用chatgpt,一键处理[海量文本](https://www.bilibili.com/video/BV1FT411H7c5/)或程序
|
||||||
|
|||||||
@ -32,9 +32,9 @@ else:
|
|||||||
|
|
||||||
# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
|
# ------------------------------------ 以下配置可以优化体验, 但大部分场合下并不需要修改 ------------------------------------
|
||||||
|
|
||||||
# 重新URL重新定向,实现更换API_URL的作用(常规情况下,不要修改!! 高危设置!通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
|
# 重新URL重新定向,实现更换API_URL的作用(高危设置! 常规情况下不要修改! 通过修改此设置,您将把您的API-KEY和对话隐私完全暴露给您设定的中间人!)
|
||||||
# 格式 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
|
# 格式: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "在这里填写重定向的api.openai.com的URL"}
|
||||||
# 例如 API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions":"https://reverse-proxy-url/v1/chat/completions"}
|
# 举例: API_URL_REDIRECT = {"https://api.openai.com/v1/chat/completions": "https://reverse-proxy-url/v1/chat/completions"}
|
||||||
API_URL_REDIRECT = {}
|
API_URL_REDIRECT = {}
|
||||||
|
|
||||||
|
|
||||||
@ -71,7 +71,7 @@ MAX_RETRY = 2
|
|||||||
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
|
# 模型选择是 (注意: LLM_MODEL是默认选中的模型, 它*必须*被包含在AVAIL_LLM_MODELS列表中 )
|
||||||
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
|
LLM_MODEL = "gpt-3.5-turbo" # 可选 ↓↓↓
|
||||||
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
|
AVAIL_LLM_MODELS = ["gpt-3.5-turbo-16k", "gpt-3.5-turbo", "azure-gpt-3.5", "api2d-gpt-3.5-turbo", "gpt-4", "api2d-gpt-4", "chatglm", "moss", "newbing", "stack-claude"]
|
||||||
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
|
# P.S. 其他可用的模型还包括 ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "claude-1-100k", "claude-2", "internlm", "jittorllms_rwkv", "jittorllms_pangualpha", "jittorllms_llama"]
|
||||||
|
|
||||||
|
|
||||||
# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
|
# ChatGLM(2) Finetune Model Path (如果使用ChatGLM2微调模型,需要把"chatglmft"加入AVAIL_LLM_MODELS中)
|
||||||
|
|||||||
6
main.py
6
main.py
@ -22,8 +22,10 @@ def main():
|
|||||||
# 问询记录, python 版本建议3.9+(越新越好)
|
# 问询记录, python 版本建议3.9+(越新越好)
|
||||||
import logging, uuid
|
import logging, uuid
|
||||||
os.makedirs("gpt_log", exist_ok=True)
|
os.makedirs("gpt_log", exist_ok=True)
|
||||||
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8")
|
try:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, encoding="utf-8", format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||||
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO)
|
except:logging.basicConfig(filename="gpt_log/chat_secrets.log", level=logging.INFO, format="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
||||||
|
# Disable logging output from the 'httpx' logger
|
||||||
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
||||||
print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
|
print("所有问询记录将自动保存在本地目录./gpt_log/chat_secrets.log, 请注意自我隐私保护哦!")
|
||||||
|
|
||||||
# 一些普通功能模块
|
# 一些普通功能模块
|
||||||
|
|||||||
@ -248,7 +248,6 @@ if "moss" in AVAIL_LLM_MODELS:
|
|||||||
if "stack-claude" in AVAIL_LLM_MODELS:
|
if "stack-claude" in AVAIL_LLM_MODELS:
|
||||||
from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
|
from .bridge_stackclaude import predict_no_ui_long_connection as claude_noui
|
||||||
from .bridge_stackclaude import predict as claude_ui
|
from .bridge_stackclaude import predict as claude_ui
|
||||||
# claude
|
|
||||||
model_info.update({
|
model_info.update({
|
||||||
"stack-claude": {
|
"stack-claude": {
|
||||||
"fn_with_ui": claude_ui,
|
"fn_with_ui": claude_ui,
|
||||||
@ -263,7 +262,6 @@ if "newbing-free" in AVAIL_LLM_MODELS:
|
|||||||
try:
|
try:
|
||||||
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
|
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
|
||||||
from .bridge_newbingfree import predict as newbingfree_ui
|
from .bridge_newbingfree import predict as newbingfree_ui
|
||||||
# claude
|
|
||||||
model_info.update({
|
model_info.update({
|
||||||
"newbing-free": {
|
"newbing-free": {
|
||||||
"fn_with_ui": newbingfree_ui,
|
"fn_with_ui": newbingfree_ui,
|
||||||
@ -280,7 +278,6 @@ if "newbing" in AVAIL_LLM_MODELS: # same with newbing-free
|
|||||||
try:
|
try:
|
||||||
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
|
from .bridge_newbingfree import predict_no_ui_long_connection as newbingfree_noui
|
||||||
from .bridge_newbingfree import predict as newbingfree_ui
|
from .bridge_newbingfree import predict as newbingfree_ui
|
||||||
# claude
|
|
||||||
model_info.update({
|
model_info.update({
|
||||||
"newbing": {
|
"newbing": {
|
||||||
"fn_with_ui": newbingfree_ui,
|
"fn_with_ui": newbingfree_ui,
|
||||||
@ -297,7 +294,6 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
|
|||||||
try:
|
try:
|
||||||
from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
|
from .bridge_chatglmft import predict_no_ui_long_connection as chatglmft_noui
|
||||||
from .bridge_chatglmft import predict as chatglmft_ui
|
from .bridge_chatglmft import predict as chatglmft_ui
|
||||||
# claude
|
|
||||||
model_info.update({
|
model_info.update({
|
||||||
"chatglmft": {
|
"chatglmft": {
|
||||||
"fn_with_ui": chatglmft_ui,
|
"fn_with_ui": chatglmft_ui,
|
||||||
@ -310,7 +306,22 @@ if "chatglmft" in AVAIL_LLM_MODELS: # same with newbing-free
|
|||||||
})
|
})
|
||||||
except:
|
except:
|
||||||
print(trimmed_format_exc())
|
print(trimmed_format_exc())
|
||||||
|
if "internlm" in AVAIL_LLM_MODELS:
|
||||||
|
try:
|
||||||
|
from .bridge_internlm import predict_no_ui_long_connection as internlm_noui
|
||||||
|
from .bridge_internlm import predict as internlm_ui
|
||||||
|
model_info.update({
|
||||||
|
"internlm": {
|
||||||
|
"fn_with_ui": internlm_ui,
|
||||||
|
"fn_without_ui": internlm_noui,
|
||||||
|
"endpoint": None,
|
||||||
|
"max_token": 4096,
|
||||||
|
"tokenizer": tokenizer_gpt35,
|
||||||
|
"token_cnt": get_token_num_gpt35,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
except:
|
||||||
|
print(trimmed_format_exc())
|
||||||
|
|
||||||
def LLM_CATCH_EXCEPTION(f):
|
def LLM_CATCH_EXCEPTION(f):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@ -141,7 +141,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg="等待响应") # 刷新界面
|
||||||
|
|
||||||
try:
|
try:
|
||||||
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream)
|
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt, stream=False)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
|
chatbot[-1] = (inputs, f"您提供的api-key不满足要求,不包含任何可用于{llm_kwargs['llm_model']}的api-key。您可能选择了错误的模型或请求源。")
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg="api-key不满足要求") # 刷新界面
|
||||||
@ -156,7 +156,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
from .bridge_all import model_info
|
from .bridge_all import model_info
|
||||||
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
|
||||||
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
response = requests.post(endpoint, headers=headers, proxies=proxies,
|
||||||
json=payload, stream=True, timeout=TIMEOUT_SECONDS);break
|
json=payload, stream=False, timeout=TIMEOUT_SECONDS);break
|
||||||
except:
|
except:
|
||||||
retry += 1
|
retry += 1
|
||||||
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
|
chatbot[-1] = ((chatbot[-1][0], timeout_bot_msg))
|
||||||
@ -174,9 +174,16 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
chunk = next(stream_response)
|
chunk = next(stream_response)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
# 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
|
# 非OpenAI官方接口的出现这样的报错,OpenAI和API2D不会走这里
|
||||||
from toolbox import regular_txt_to_markdown; tb_str = '```\n' + trimmed_format_exc() + '```'
|
chunk_decoded = chunk.decode()
|
||||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 远程返回错误: \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk.decode())}")
|
if '"finish_reason":"stop"' in chunk_decoded:
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg="远程返回错误:" + chunk.decode()) # 刷新界面
|
history[-1] = json.loads(chunk_decoded)['choices'][0]['message']['content']
|
||||||
|
chatbot[-1] = (history[-2], history[-1])
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history, msg='OK') # 刷新界面
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
error_msg = chunk_decoded
|
||||||
|
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history, msg="非Openai官方接口返回了错误:" + chunk.decode()) # 刷新界面
|
||||||
return
|
return
|
||||||
|
|
||||||
# print(chunk.decode()[6:])
|
# print(chunk.decode()[6:])
|
||||||
@ -187,7 +194,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
if chunk:
|
if chunk:
|
||||||
try:
|
try:
|
||||||
chunk_decoded = chunk.decode()
|
chunk_decoded = chunk.decode()
|
||||||
# 前者API2D的
|
# 前者是API2D的结束条件,后者是OPENAI的结束条件
|
||||||
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
|
if ('data: [DONE]' in chunk_decoded) or (len(json.loads(chunk_decoded[6:])['choices'][0]["delta"]) == 0):
|
||||||
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
# 判定为数据流的结束,gpt_replying_buffer也写完了
|
||||||
logging.info(f'[response] {gpt_replying_buffer}')
|
logging.info(f'[response] {gpt_replying_buffer}')
|
||||||
@ -200,13 +207,18 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
history[-1] = gpt_replying_buffer
|
history[-1] = gpt_replying_buffer
|
||||||
chatbot[-1] = (history[-2], history[-1])
|
chatbot[-1] = (history[-2], history[-1])
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg=status_text) # 刷新界面
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
|
yield from update_ui(chatbot=chatbot, history=history, msg="Json解析不合常规") # 刷新界面
|
||||||
chunk = get_full_error(chunk, stream_response)
|
chunk = get_full_error(chunk, stream_response)
|
||||||
chunk_decoded = chunk.decode()
|
chunk_decoded = chunk.decode()
|
||||||
error_msg = chunk_decoded
|
error_msg = chunk_decoded
|
||||||
|
chatbot, history = handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg)
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
||||||
|
print(error_msg)
|
||||||
|
return
|
||||||
|
|
||||||
|
def handle_error(inputs, llm_kwargs, chatbot, history, chunk_decoded, error_msg):
|
||||||
|
from .bridge_all import model_info
|
||||||
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
|
openai_website = ' 请登录OpenAI查看详情 https://platform.openai.com/signup'
|
||||||
if "reduce the length" in error_msg:
|
if "reduce the length" in error_msg:
|
||||||
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
if len(history) >= 2: history[-1] = ""; history[-2] = "" # 清除当前溢出的输入:history[-2] 是本次输入, history[-1] 是本次输出
|
||||||
@ -232,8 +244,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
|
|||||||
from toolbox import regular_txt_to_markdown
|
from toolbox import regular_txt_to_markdown
|
||||||
tb_str = '```\n' + trimmed_format_exc() + '```'
|
tb_str = '```\n' + trimmed_format_exc() + '```'
|
||||||
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
|
chatbot[-1] = (chatbot[-1][0], f"[Local Message] 异常 \n\n{tb_str} \n\n{regular_txt_to_markdown(chunk_decoded)}")
|
||||||
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
|
return chatbot, history
|
||||||
return
|
|
||||||
|
|
||||||
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
|
||||||
"""
|
"""
|
||||||
|
|||||||
315
request_llm/bridge_internlm.py
Normal file
315
request_llm/bridge_internlm.py
Normal file
@ -0,0 +1,315 @@
|
|||||||
|
|
||||||
|
from transformers import AutoModel, AutoTokenizer
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
import importlib
|
||||||
|
from toolbox import update_ui, get_conf, Singleton
|
||||||
|
from multiprocessing import Process, Pipe
|
||||||
|
|
||||||
|
model_name = "InternLM"
|
||||||
|
cmd_to_install = "`pip install ???`"
|
||||||
|
load_message = f"{model_name}尚未加载,加载需要一段时间。注意,取决于`config.py`的配置,{model_name}消耗大量的内存(CPU)或显存(GPU),也许会导致低配计算机卡死 ……"
|
||||||
|
def try_to_import_special_deps():
|
||||||
|
import sentencepiece
|
||||||
|
|
||||||
|
user_prompt = "<|User|>:{user}<eoh>\n"
|
||||||
|
robot_prompt = "<|Bot|>:{robot}<eoa>\n"
|
||||||
|
cur_query_prompt = "<|User|>:{user}<eoh>\n<|Bot|>:"
|
||||||
|
|
||||||
|
|
||||||
|
def combine_history(prompt, hist):
|
||||||
|
messages = hist
|
||||||
|
total_prompt = ""
|
||||||
|
for message in messages:
|
||||||
|
cur_content = message
|
||||||
|
cur_prompt = user_prompt.replace("{user}", cur_content[0])
|
||||||
|
total_prompt += cur_prompt
|
||||||
|
cur_prompt = robot_prompt.replace("{robot}", cur_content[1])
|
||||||
|
total_prompt += cur_prompt
|
||||||
|
total_prompt = total_prompt + cur_query_prompt.replace("{user}", prompt)
|
||||||
|
return total_prompt
|
||||||
|
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
class GetInternlmHandle(Process):
|
||||||
|
def __init__(self):
|
||||||
|
# ⭐主进程执行
|
||||||
|
super().__init__(daemon=True)
|
||||||
|
self.parent, self.child = Pipe()
|
||||||
|
self._model = None
|
||||||
|
self._tokenizer = None
|
||||||
|
self.info = ""
|
||||||
|
self.success = True
|
||||||
|
self.check_dependency()
|
||||||
|
self.start()
|
||||||
|
self.threadLock = threading.Lock()
|
||||||
|
|
||||||
|
def ready(self):
|
||||||
|
# ⭐主进程执行
|
||||||
|
return self._model is not None
|
||||||
|
|
||||||
|
def load_model_and_tokenizer(self):
|
||||||
|
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||||
|
import torch
|
||||||
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
device, = get_conf('LOCAL_MODEL_DEVICE')
|
||||||
|
if self._model is None:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True)
|
||||||
|
if device=='cpu':
|
||||||
|
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16)
|
||||||
|
else:
|
||||||
|
model = AutoModelForCausalLM.from_pretrained("internlm/internlm-chat-7b", trust_remote_code=True).to(torch.bfloat16).cuda()
|
||||||
|
|
||||||
|
model = model.eval()
|
||||||
|
return model, tokenizer
|
||||||
|
|
||||||
|
def llm_stream_generator(self, **kwargs):
|
||||||
|
import torch
|
||||||
|
import logging
|
||||||
|
import copy
|
||||||
|
import warnings
|
||||||
|
import torch.nn as nn
|
||||||
|
from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig
|
||||||
|
|
||||||
|
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||||
|
def adaptor():
|
||||||
|
model = self._model
|
||||||
|
tokenizer = self._tokenizer
|
||||||
|
prompt = kwargs['query']
|
||||||
|
max_length = kwargs['max_length']
|
||||||
|
top_p = kwargs['top_p']
|
||||||
|
temperature = kwargs['temperature']
|
||||||
|
history = kwargs['history']
|
||||||
|
real_prompt = combine_history(prompt, history)
|
||||||
|
return model, tokenizer, real_prompt, max_length, top_p, temperature
|
||||||
|
|
||||||
|
model, tokenizer, prompt, max_length, top_p, temperature = adaptor()
|
||||||
|
prefix_allowed_tokens_fn = None
|
||||||
|
logits_processor = None
|
||||||
|
stopping_criteria = None
|
||||||
|
additional_eos_token_id = 103028
|
||||||
|
generation_config = None
|
||||||
|
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||||
|
# 🏃♂️🏃♂️🏃♂️ https://github.com/InternLM/InternLM/blob/efbf5335709a8c8faeac6eaf07193973ff1d56a1/web_demo.py#L25
|
||||||
|
|
||||||
|
inputs = tokenizer([prompt], padding=True, return_tensors="pt")
|
||||||
|
input_length = len(inputs["input_ids"][0])
|
||||||
|
for k, v in inputs.items():
|
||||||
|
inputs[k] = v.cuda()
|
||||||
|
input_ids = inputs["input_ids"]
|
||||||
|
batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
|
||||||
|
if generation_config is None:
|
||||||
|
generation_config = model.generation_config
|
||||||
|
generation_config = copy.deepcopy(generation_config)
|
||||||
|
model_kwargs = generation_config.update(**kwargs)
|
||||||
|
bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
|
||||||
|
if isinstance(eos_token_id, int):
|
||||||
|
eos_token_id = [eos_token_id]
|
||||||
|
if additional_eos_token_id is not None:
|
||||||
|
eos_token_id.append(additional_eos_token_id)
|
||||||
|
has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
|
||||||
|
if has_default_max_length and generation_config.max_new_tokens is None:
|
||||||
|
warnings.warn(
|
||||||
|
f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
|
||||||
|
"This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
|
||||||
|
" recommend using `max_new_tokens` to control the maximum length of the generation.",
|
||||||
|
UserWarning,
|
||||||
|
)
|
||||||
|
elif generation_config.max_new_tokens is not None:
|
||||||
|
generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
|
||||||
|
if not has_default_max_length:
|
||||||
|
logging.warn(
|
||||||
|
f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
|
||||||
|
f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
|
||||||
|
"Please refer to the documentation for more information. "
|
||||||
|
"(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
|
||||||
|
UserWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
if input_ids_seq_length >= generation_config.max_length:
|
||||||
|
input_ids_string = "input_ids"
|
||||||
|
logging.warning(
|
||||||
|
f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
|
||||||
|
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
|
||||||
|
" increasing `max_new_tokens`."
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Set generation parameters if not already defined
|
||||||
|
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
|
||||||
|
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
|
||||||
|
|
||||||
|
logits_processor = model._get_logits_processor(
|
||||||
|
generation_config=generation_config,
|
||||||
|
input_ids_seq_length=input_ids_seq_length,
|
||||||
|
encoder_input_ids=input_ids,
|
||||||
|
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
|
||||||
|
logits_processor=logits_processor,
|
||||||
|
)
|
||||||
|
|
||||||
|
stopping_criteria = model._get_stopping_criteria(
|
||||||
|
generation_config=generation_config, stopping_criteria=stopping_criteria
|
||||||
|
)
|
||||||
|
logits_warper = model._get_logits_warper(generation_config)
|
||||||
|
|
||||||
|
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
|
||||||
|
scores = None
|
||||||
|
while True:
|
||||||
|
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
|
||||||
|
# forward pass to get next token
|
||||||
|
outputs = model(
|
||||||
|
**model_inputs,
|
||||||
|
return_dict=True,
|
||||||
|
output_attentions=False,
|
||||||
|
output_hidden_states=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
next_token_logits = outputs.logits[:, -1, :]
|
||||||
|
|
||||||
|
# pre-process distribution
|
||||||
|
next_token_scores = logits_processor(input_ids, next_token_logits)
|
||||||
|
next_token_scores = logits_warper(input_ids, next_token_scores)
|
||||||
|
|
||||||
|
# sample
|
||||||
|
probs = nn.functional.softmax(next_token_scores, dim=-1)
|
||||||
|
if generation_config.do_sample:
|
||||||
|
next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
|
||||||
|
else:
|
||||||
|
next_tokens = torch.argmax(probs, dim=-1)
|
||||||
|
|
||||||
|
# update generated ids, model inputs, and length for next step
|
||||||
|
input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
|
||||||
|
model_kwargs = model._update_model_kwargs_for_generation(
|
||||||
|
outputs, model_kwargs, is_encoder_decoder=False
|
||||||
|
)
|
||||||
|
unfinished_sequences = unfinished_sequences.mul((min(next_tokens != i for i in eos_token_id)).long())
|
||||||
|
|
||||||
|
output_token_ids = input_ids[0].cpu().tolist()
|
||||||
|
output_token_ids = output_token_ids[input_length:]
|
||||||
|
for each_eos_token_id in eos_token_id:
|
||||||
|
if output_token_ids[-1] == each_eos_token_id:
|
||||||
|
output_token_ids = output_token_ids[:-1]
|
||||||
|
response = tokenizer.decode(output_token_ids)
|
||||||
|
|
||||||
|
yield response
|
||||||
|
# stop when each sentence is finished, or if we exceed the maximum length
|
||||||
|
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def check_dependency(self):
|
||||||
|
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||||
|
try:
|
||||||
|
try_to_import_special_deps()
|
||||||
|
self.info = "依赖检测通过"
|
||||||
|
self.success = True
|
||||||
|
except:
|
||||||
|
self.info = f"缺少{model_name}的依赖,如果要使用{model_name},除了基础的pip依赖以外,您还需要运行{cmd_to_install}安装{model_name}的依赖。"
|
||||||
|
self.success = False
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
# 🏃♂️🏃♂️🏃♂️ 子进程执行
|
||||||
|
# 第一次运行,加载参数
|
||||||
|
try:
|
||||||
|
self._model, self._tokenizer = self.load_model_and_tokenizer()
|
||||||
|
except:
|
||||||
|
from toolbox import trimmed_format_exc
|
||||||
|
self.child.send(f'[Local Message] 不能正常加载{model_name}的参数.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
|
||||||
|
raise RuntimeError(f"不能正常加载{model_name}的参数!")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# 进入任务等待状态
|
||||||
|
kwargs = self.child.recv()
|
||||||
|
# 收到消息,开始请求
|
||||||
|
try:
|
||||||
|
for response_full in self.llm_stream_generator(**kwargs):
|
||||||
|
self.child.send(response_full)
|
||||||
|
except:
|
||||||
|
from toolbox import trimmed_format_exc
|
||||||
|
self.child.send(f'[Local Message] 调用{model_name}失败.' + '\n```\n' + trimmed_format_exc() + '\n```\n')
|
||||||
|
# 请求处理结束,开始下一个循环
|
||||||
|
self.child.send('[Finish]')
|
||||||
|
|
||||||
|
def stream_chat(self, **kwargs):
|
||||||
|
# ⭐主进程执行
|
||||||
|
self.threadLock.acquire()
|
||||||
|
self.parent.send(kwargs)
|
||||||
|
while True:
|
||||||
|
res = self.parent.recv()
|
||||||
|
if res != '[Finish]':
|
||||||
|
yield res
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
self.threadLock.release()
|
||||||
|
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------------------------------------------------------------
|
||||||
|
# 🔌💻 GPT-Academic
|
||||||
|
# ------------------------------------------------------------------------------------------------------------------------
|
||||||
|
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=[], console_slience=False):
|
||||||
|
"""
|
||||||
|
⭐多线程方法
|
||||||
|
函数的说明请见 request_llm/bridge_all.py
|
||||||
|
"""
|
||||||
|
_llm_handle = GetInternlmHandle()
|
||||||
|
if len(observe_window) >= 1: observe_window[0] = load_message + "\n\n" + _llm_handle.info
|
||||||
|
if not _llm_handle.success:
|
||||||
|
error = _llm_handle.info
|
||||||
|
_llm_handle = None
|
||||||
|
raise RuntimeError(error)
|
||||||
|
|
||||||
|
# chatglm 没有 sys_prompt 接口,因此把prompt加入 history
|
||||||
|
history_feedin = []
|
||||||
|
history_feedin.append(["What can I do?", sys_prompt])
|
||||||
|
for i in range(len(history)//2):
|
||||||
|
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||||
|
|
||||||
|
watch_dog_patience = 5 # 看门狗 (watchdog) 的耐心, 设置5秒即可
|
||||||
|
response = ""
|
||||||
|
for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
|
||||||
|
if len(observe_window) >= 1: observe_window[0] = response
|
||||||
|
if len(observe_window) >= 2:
|
||||||
|
if (time.time()-observe_window[1]) > watch_dog_patience:
|
||||||
|
raise RuntimeError("程序终止。")
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_prompt='', stream = True, additional_fn=None):
|
||||||
|
"""
|
||||||
|
⭐单线程方法
|
||||||
|
函数的说明请见 request_llm/bridge_all.py
|
||||||
|
"""
|
||||||
|
chatbot.append((inputs, ""))
|
||||||
|
|
||||||
|
_llm_handle = GetInternlmHandle()
|
||||||
|
chatbot[-1] = (inputs, load_message + "\n\n" + _llm_handle.info)
|
||||||
|
yield from update_ui(chatbot=chatbot, history=[])
|
||||||
|
if not _llm_handle.success:
|
||||||
|
_llm_handle = None
|
||||||
|
return
|
||||||
|
|
||||||
|
if additional_fn is not None:
|
||||||
|
import core_functional
|
||||||
|
importlib.reload(core_functional) # 热更新prompt
|
||||||
|
core_functional = core_functional.get_core_functions()
|
||||||
|
if "PreProcess" in core_functional[additional_fn]: inputs = core_functional[additional_fn]["PreProcess"](inputs) # 获取预处理函数(如果有的话)
|
||||||
|
inputs = core_functional[additional_fn]["Prefix"] + inputs + core_functional[additional_fn]["Suffix"]
|
||||||
|
|
||||||
|
# 处理历史信息
|
||||||
|
history_feedin = []
|
||||||
|
history_feedin.append(["What can I do?", system_prompt] )
|
||||||
|
for i in range(len(history)//2):
|
||||||
|
history_feedin.append([history[2*i], history[2*i+1]] )
|
||||||
|
|
||||||
|
# 开始接收chatglm的回复
|
||||||
|
response = f"[Local Message]: 等待{model_name}响应中 ..."
|
||||||
|
for response in _llm_handle.stream_chat(query=inputs, history=history_feedin, max_length=llm_kwargs['max_length'], top_p=llm_kwargs['top_p'], temperature=llm_kwargs['temperature']):
|
||||||
|
chatbot[-1] = (inputs, response)
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history)
|
||||||
|
|
||||||
|
# 总结输出
|
||||||
|
if response == f"[Local Message]: 等待{model_name}响应中 ...":
|
||||||
|
response = f"[Local Message]: {model_name}响应异常 ..."
|
||||||
|
history.extend([inputs, response])
|
||||||
|
yield from update_ui(chatbot=chatbot, history=history)
|
||||||
@ -447,6 +447,15 @@ class _ChatHub:
|
|||||||
"""
|
"""
|
||||||
Ask a question to the bot
|
Ask a question to the bot
|
||||||
"""
|
"""
|
||||||
|
req_header = HEADERS
|
||||||
|
if self.cookies is not None:
|
||||||
|
ws_cookies = []
|
||||||
|
for cookie in self.cookies:
|
||||||
|
ws_cookies.append(f"{cookie['name']}={cookie['value']}")
|
||||||
|
req_header.update({
|
||||||
|
'Cookie': ';'.join(ws_cookies),
|
||||||
|
})
|
||||||
|
|
||||||
timeout = aiohttp.ClientTimeout(total=30)
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
self.session = aiohttp.ClientSession(timeout=timeout)
|
self.session = aiohttp.ClientSession(timeout=timeout)
|
||||||
|
|
||||||
@ -455,7 +464,7 @@ class _ChatHub:
|
|||||||
# Check if websocket is closed
|
# Check if websocket is closed
|
||||||
self.wss = await self.session.ws_connect(
|
self.wss = await self.session.ws_connect(
|
||||||
wss_link,
|
wss_link,
|
||||||
headers=HEADERS,
|
headers=req_header,
|
||||||
ssl=ssl_context,
|
ssl=ssl_context,
|
||||||
proxy=self.proxy,
|
proxy=self.proxy,
|
||||||
autoping=False,
|
autoping=False,
|
||||||
|
|||||||
@ -14,7 +14,8 @@ if __name__ == "__main__":
|
|||||||
# from request_llm.bridge_moss import predict_no_ui_long_connection
|
# from request_llm.bridge_moss import predict_no_ui_long_connection
|
||||||
# from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
|
# from request_llm.bridge_jittorllms_pangualpha import predict_no_ui_long_connection
|
||||||
# from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection
|
# from request_llm.bridge_jittorllms_llama import predict_no_ui_long_connection
|
||||||
from request_llm.bridge_claude import predict_no_ui_long_connection
|
# from request_llm.bridge_claude import predict_no_ui_long_connection
|
||||||
|
from request_llm.bridge_internlm import predict_no_ui_long_connection
|
||||||
|
|
||||||
llm_kwargs = {
|
llm_kwargs = {
|
||||||
'max_length': 512,
|
'max_length': 512,
|
||||||
@ -22,45 +23,8 @@ if __name__ == "__main__":
|
|||||||
'temperature': 1,
|
'temperature': 1,
|
||||||
}
|
}
|
||||||
|
|
||||||
result = predict_no_ui_long_connection(inputs="你好",
|
result = predict_no_ui_long_connection( inputs="请问什么是质子?",
|
||||||
llm_kwargs=llm_kwargs,
|
llm_kwargs=llm_kwargs,
|
||||||
history=[],
|
history=["你好", "我好!"],
|
||||||
sys_prompt="")
|
sys_prompt="")
|
||||||
print('final result:', result)
|
print('final result:', result)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# # print(result)
|
|
||||||
# from multiprocessing import Process, Pipe
|
|
||||||
# class GetGLMHandle(Process):
|
|
||||||
# def __init__(self):
|
|
||||||
# super().__init__(daemon=True)
|
|
||||||
# pass
|
|
||||||
# def run(self):
|
|
||||||
# # 子进程执行
|
|
||||||
# # 第一次运行,加载参数
|
|
||||||
# def validate_path():
|
|
||||||
# import os, sys
|
|
||||||
# dir_name = os.path.dirname(__file__)
|
|
||||||
# root_dir_assume = os.path.abspath(os.path.dirname(__file__) + '/..')
|
|
||||||
# os.chdir(root_dir_assume + '/request_llm/jittorllms')
|
|
||||||
# sys.path.append(root_dir_assume + '/request_llm/jittorllms')
|
|
||||||
# validate_path() # validate path so you can run from base directory
|
|
||||||
# jittorllms_model = None
|
|
||||||
# import types
|
|
||||||
# try:
|
|
||||||
# if jittorllms_model is None:
|
|
||||||
# from models import get_model
|
|
||||||
# # availabel_models = ["chatglm", "pangualpha", "llama", "chatrwkv"]
|
|
||||||
# args_dict = {'model': 'chatrwkv'}
|
|
||||||
# print('self.jittorllms_model = get_model(types.SimpleNamespace(**args_dict))')
|
|
||||||
# jittorllms_model = get_model(types.SimpleNamespace(**args_dict))
|
|
||||||
# print('done get model')
|
|
||||||
# except:
|
|
||||||
# # self.child.send('[Local Message] Call jittorllms fail 不能正常加载jittorllms的参数。')
|
|
||||||
# raise RuntimeError("不能正常加载jittorllms的参数!")
|
|
||||||
# x = GetGLMHandle()
|
|
||||||
# x.start()
|
|
||||||
|
|
||||||
|
|
||||||
# input()
|
|
||||||
12
toolbox.py
12
toolbox.py
@ -884,3 +884,15 @@ def objload(file='objdump.tmp'):
|
|||||||
with open(file, 'rb') as f:
|
with open(file, 'rb') as f:
|
||||||
return pickle.load(f)
|
return pickle.load(f)
|
||||||
|
|
||||||
|
def Singleton(cls):
|
||||||
|
"""
|
||||||
|
一个单实例装饰器
|
||||||
|
"""
|
||||||
|
_instance = {}
|
||||||
|
|
||||||
|
def _singleton(*args, **kargs):
|
||||||
|
if cls not in _instance:
|
||||||
|
_instance[cls] = cls(*args, **kargs)
|
||||||
|
return _instance[cls]
|
||||||
|
|
||||||
|
return _singleton
|
||||||
|
|||||||
Reference in New Issue
Block a user