Merge pull request #841 from KelvinF97/master

Optimize some code and fix some bugs
This commit is contained in:
binary-husky
2023-07-01 22:31:28 +08:00
committed by GitHub
6 changed files with 241 additions and 172 deletions

View File

@ -1,10 +1,10 @@
def check_proxy(proxies): def check_proxy(proxies: dict):
import requests import requests
proxies_https = proxies['https'] if proxies is not None else '' proxies_https = proxies.get('https') if proxies is not None else ''
try: try:
response = requests.get("https://ipapi.co/json/", response = requests.get("https://ipapi.co/json/",
proxies=proxies, timeout=4) proxies=proxies, timeout=30)
data = response.json() data = response.json()
print(f'查询代理的地理位置,返回的结果是{data}') print(f'查询代理的地理位置,返回的结果是{data}')
if 'country_name' in data: if 'country_name' in data:
@ -16,8 +16,8 @@ def check_proxy(proxies):
result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}" result = f"代理配置 {proxies_https}, 代理数据解析失败:{data}"
print(result) print(result)
return result return result
except: except Exception as e:
result = f"代理配置 {proxies_https}, 代理所在地查询超时,代理可能无效" result = f"代理 {proxies_https} 查询出现异常: {e},代理可能无效"
print(result) print(result)
return result return result

View File

@ -1,16 +1,19 @@
from toolbox import update_ui, get_conf, trimmed_format_exc from toolbox import update_ui, get_conf, trimmed_format_exc
import threading import threading
def input_clipping(inputs, history, max_token_limit): def input_clipping(inputs, history, max_token_limit):
import numpy as np import numpy as np
from request_llm.bridge_all import model_info from request_llm.bridge_all import model_info
enc = model_info["gpt-3.5-turbo"]['tokenizer'] enc = model_info["gpt-3.5-turbo"]['tokenizer']
def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
def get_token_num(txt):
return len(enc.encode(txt, disallowed_special=()))
mode = 'input-and-history' mode = 'input-and-history'
# 当 输入部分的token占比 小于 全文的一半时,只裁剪历史 # 当 输入部分的token占比 小于 全文的一半时,只裁剪历史
input_token_num = get_token_num(inputs) input_token_num = get_token_num(inputs)
if input_token_num < max_token_limit//2: if input_token_num < max_token_limit // 2:
mode = 'only-history' mode = 'only-history'
max_token_limit = max_token_limit - input_token_num max_token_limit = max_token_limit - input_token_num
@ -23,7 +26,7 @@ def input_clipping(inputs, history, max_token_limit):
while n_token > max_token_limit: while n_token > max_token_limit:
where = np.argmax(everything_token) where = np.argmax(everything_token)
encoded = enc.encode(everything[where], disallowed_special=()) encoded = enc.encode(everything[where], disallowed_special=())
clipped_encoded = encoded[:len(encoded)-delta] clipped_encoded = encoded[:len(encoded) - delta]
everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char everything[where] = enc.decode(clipped_encoded)[:-1] # -1 to remove the may-be illegal char
everything_token[where] = get_token_num(everything[where]) everything_token[where] = get_token_num(everything[where])
n_token = get_token_num('\n'.join(everything)) n_token = get_token_num('\n'.join(everything))
@ -35,12 +38,13 @@ def input_clipping(inputs, history, max_token_limit):
history = everything[1:] history = everything[1:]
return inputs, history return inputs, history
def request_gpt_model_in_new_thread_with_ui_alive( def request_gpt_model_in_new_thread_with_ui_alive(
inputs, inputs_show_user, llm_kwargs, inputs, inputs_show_user, llm_kwargs,
chatbot, history, sys_prompt, refresh_interval=0.2, chatbot, history, sys_prompt, refresh_interval=0.2,
handle_token_exceed=True, handle_token_exceed=True,
retry_times_at_unknown_error=2, retry_times_at_unknown_error=2,
): ):
""" """
Request GPT model请求GPT模型同时维持用户界面活跃。 Request GPT model请求GPT模型同时维持用户界面活跃。
@ -67,12 +71,13 @@ def request_gpt_model_in_new_thread_with_ui_alive(
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
executor = ThreadPoolExecutor(max_workers=16) executor = ThreadPoolExecutor(max_workers=16)
mutable = ["", time.time(), ""] mutable = ["", time.time(), ""]
def _req_gpt(inputs, history, sys_prompt): def _req_gpt(inputs, history, sys_prompt):
retry_op = retry_times_at_unknown_error retry_op = retry_times_at_unknown_error
exceeded_cnt = 0 exceeded_cnt = 0
while True: while True:
# watchdog error # watchdog error
if len(mutable) >= 2 and (time.time()-mutable[1]) > 5: if len(mutable) >= 2 and (time.time() - mutable[1]) > 5:
raise RuntimeError("检测到程序终止。") raise RuntimeError("检测到程序终止。")
try: try:
# 【第一种情况】:顺利完成 # 【第一种情况】:顺利完成
@ -89,7 +94,7 @@ def request_gpt_model_in_new_thread_with_ui_alive(
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
MAX_TOKEN = 4096 MAX_TOKEN = 4096
EXCEED_ALLO = 512 + 512 * exceeded_cnt EXCEED_ALLO = 512 + 512 * exceeded_cnt
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO) inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
mutable[0] += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n' mutable[0] += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n'
continue # 返回重试 continue # 返回重试
else: else:
@ -104,7 +109,8 @@ def request_gpt_model_in_new_thread_with_ui_alive(
mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" mutable[0] += f"[Local Message] 警告,在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if retry_op > 0: if retry_op > 0:
retry_op -= 1 retry_op -= 1
mutable[0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}\n\n" mutable[
0] += f"[Local Message] 重试中,请稍等 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}\n\n"
if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str): if ("Rate limit reached" in tb_str) or ("Too Many Requests" in tb_str):
time.sleep(30) time.sleep(30)
time.sleep(5) time.sleep(5)
@ -137,7 +143,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
refresh_interval=0.2, max_workers=-1, scroller_max_len=30, refresh_interval=0.2, max_workers=-1, scroller_max_len=30,
handle_token_exceed=True, show_user_at_complete=False, handle_token_exceed=True, show_user_at_complete=False,
retry_times_at_unknown_error=2, retry_times_at_unknown_error=2,
): ):
""" """
Request GPT model using multiple threads with UI and high efficiency Request GPT model using multiple threads with UI and high efficiency
请求GPT模型的[多线程]版。 请求GPT模型的[多线程]版。
@ -171,8 +177,10 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
assert len(inputs_array) == len(history_array) assert len(inputs_array) == len(history_array)
assert len(inputs_array) == len(sys_prompt_array) assert len(inputs_array) == len(sys_prompt_array)
if max_workers == -1: # 读取配置文件 if max_workers == -1: # 读取配置文件
try: max_workers, = get_conf('DEFAULT_WORKER_NUM') try:
except: max_workers = 8 max_workers, = get_conf('DEFAULT_WORKER_NUM')
except:
max_workers = 8
if max_workers <= 0: max_workers = 3 if max_workers <= 0: max_workers = 3
# 屏蔽掉 chatglm的多线程可能会导致严重卡顿 # 屏蔽掉 chatglm的多线程可能会导致严重卡顿
if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')): if not (llm_kwargs['llm_model'].startswith('gpt-') or llm_kwargs['llm_model'].startswith('api2d-')):
@ -194,7 +202,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[index][2] = "执行中" mutable[index][2] = "执行中"
while True: while True:
# watchdog error # watchdog error
if len(mutable[index]) >= 2 and (time.time()-mutable[index][1]) > 5: if len(mutable[index]) >= 2 and (time.time() - mutable[index][1]) > 5:
raise RuntimeError("检测到程序终止。") raise RuntimeError("检测到程序终止。")
try: try:
# 【第一种情况】:顺利完成 # 【第一种情况】:顺利完成
@ -214,7 +222,7 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error)) p_ratio, n_exceed = get_reduce_token_percent(str(token_exceeded_error))
MAX_TOKEN = 4096 MAX_TOKEN = 4096
EXCEED_ALLO = 512 + 512 * exceeded_cnt EXCEED_ALLO = 512 + 512 * exceeded_cnt
inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN-EXCEED_ALLO) inputs, history = input_clipping(inputs, history, max_token_limit=MAX_TOKEN - EXCEED_ALLO)
gpt_say += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n' gpt_say += f'[Local Message] 警告文本过长将进行截断Token溢出数{n_exceed}\n\n'
mutable[index][2] = f"截断重试" mutable[index][2] = f"截断重试"
continue # 返回重试 continue # 返回重试
@ -222,15 +230,17 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
# 【选择放弃】 # 【选择放弃】
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
mutable[index][2] = "输入过长已放弃" mutable[index][2] = "输入过长已放弃"
return gpt_say # 放弃 return gpt_say # 放弃
except: except Exception as e:
# 【第三种情况】:其他错误 # 【第三种情况】:其他错误
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
print(tb_str) print(f"发生异常:{e}, 调用栈信息:{tb_str}")
gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n" gpt_say += f"[Local Message] 警告,线程{index}在执行过程中遭遇问题, Traceback\n\n{tb_str}\n\n"
if len(mutable[index][0]) > 0: gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0] if len(mutable[index][0]) > 0:
gpt_say += "此线程失败前收到的回答:\n\n" + mutable[index][0]
if retry_op > 0: if retry_op > 0:
retry_op -= 1 retry_op -= 1
wait = random.randint(5, 20) wait = random.randint(5, 20)
@ -241,9 +251,11 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
fail_info = "" fail_info = ""
# 也许等待十几秒后,情况会好转 # 也许等待十几秒后,情况会好转
for i in range(wait): for i in range(wait):
mutable[index][2] = f"{fail_info}等待重试 {wait-i}"; time.sleep(1) mutable[index][2] = f"{fail_info}等待重试 {wait - i}";
time.sleep(1)
# 开始重试 # 开始重试
mutable[index][2] = f"重试中 {retry_times_at_unknown_error-retry_op}/{retry_times_at_unknown_error}" mutable[index][
2] = f"重试中 {retry_times_at_unknown_error - retry_op}/{retry_times_at_unknown_error}"
continue # 返回重试 continue # 返回重试
else: else:
mutable[index][2] = "已失败" mutable[index][2] = "已失败"
@ -252,7 +264,8 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
return gpt_say # 放弃 return gpt_say # 放弃
# 异步任务开始 # 异步任务开始
futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in zip( futures = [executor.submit(_req_gpt, index, inputs, history, sys_prompt) for index, inputs, history, sys_prompt in
zip(
range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)] range(len(inputs_array)), inputs_array, history_array, sys_prompt_array)]
cnt = 0 cnt = 0
while True: while True:
@ -267,16 +280,16 @@ def request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
mutable[thread_index][1] = time.time() mutable[thread_index][1] = time.time()
# 在前端打印些好玩的东西 # 在前端打印些好玩的东西
for thread_index, _ in enumerate(worker_done): for thread_index, _ in enumerate(worker_done):
print_something_really_funny = "[ ...`"+mutable[thread_index][0][-scroller_max_len:].\ print_something_really_funny = "[ ...`" + mutable[thread_index][0][-scroller_max_len:]. \
replace('\n', '').replace('```', '...').replace( replace('\n', '').replace('```', '...').replace(
' ', '.').replace('<br/>', '.....').replace('$', '.')+"`... ]" ' ', '.').replace('<br/>', '.....').replace('$', '.') + "`... ]"
observe_win.append(print_something_really_funny) observe_win.append(print_something_really_funny)
# 在前端打印些好玩的东西 # 在前端打印些好玩的东西
stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n' stat_str = ''.join([f'`{mutable[thread_index][2]}`: {obs}\n\n'
if not done else f'`{mutable[thread_index][2]}`\n\n' if not done else f'`{mutable[thread_index][2]}`\n\n'
for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)]) for thread_index, done, obs in zip(range(len(worker_done)), worker_done, observe_win)])
# 在前端打印些好玩的东西 # 在前端打印些好玩的东西
chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.']*(cnt % 10+1))] chatbot[-1] = [chatbot[-1][0], f'多线程操作已经开始,完成情况: \n\n{stat_str}' + ''.join(['.'] * (cnt % 10 + 1))]
yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面 yield from update_ui(chatbot=chatbot, history=[]) # 刷新界面
if all(worker_done): if all(worker_done):
executor.shutdown() executor.shutdown()
@ -306,6 +319,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
lines = txt_tocut.split('\n') lines = txt_tocut.split('\n')
estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines) estimated_line_cut = limit / get_token_fn(txt_tocut) * len(lines)
estimated_line_cut = int(estimated_line_cut) estimated_line_cut = int(estimated_line_cut)
cnt = 0
for cnt in reversed(range(estimated_line_cut)): for cnt in reversed(range(estimated_line_cut)):
if must_break_at_empty_line: if must_break_at_empty_line:
if lines[cnt] != "": if lines[cnt] != "":
@ -322,6 +336,7 @@ def breakdown_txt_to_satisfy_token_limit(txt, get_token_fn, limit):
result = [prev] result = [prev]
result.extend(cut(post, must_break_at_empty_line)) result.extend(cut(post, must_break_at_empty_line))
return result return result
try: try:
return cut(txt, must_break_at_empty_line=True) return cut(txt, must_break_at_empty_line=True)
except RuntimeError: except RuntimeError:
@ -337,6 +352,7 @@ def force_breakdown(txt, limit, get_token_fn):
return txt[:i], txt[i:] return txt[:i], txt[i:]
return "Tiktoken未知错误", "Tiktoken未知错误" return "Tiktoken未知错误", "Tiktoken未知错误"
def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit): def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
# 递归 # 递归
def cut(txt_tocut, must_break_at_empty_line, break_anyway=False): def cut(txt_tocut, must_break_at_empty_line, break_anyway=False):
@ -365,6 +381,7 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
result = [prev] result = [prev]
result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway)) result.extend(cut(post, must_break_at_empty_line, break_anyway=break_anyway))
return result return result
try: try:
# 第1次尝试将双空行\n\n作为切分点 # 第1次尝试将双空行\n\n作为切分点
return cut(txt, must_break_at_empty_line=True) return cut(txt, must_break_at_empty_line=True)
@ -387,7 +404,6 @@ def breakdown_txt_to_satisfy_token_limit_for_pdf(txt, get_token_fn, limit):
return cut(txt, must_break_at_empty_line=False, break_anyway=True) return cut(txt, must_break_at_empty_line=False, break_anyway=True)
def read_and_clean_pdf_text(fp): def read_and_clean_pdf_text(fp):
""" """
这个函数用于分割pdf用了很多trick逻辑较乱效果奇好 这个函数用于分割pdf用了很多trick逻辑较乱效果奇好
@ -417,6 +433,7 @@ def read_and_clean_pdf_text(fp):
fb = 2 # Index 2 框框 fb = 2 # Index 2 框框
REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等) REMOVE_FOOT_NOTE = True # 是否丢弃掉 不是正文的内容 (比正文字体小,如参考文献、脚注、图注等)
REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化) REMOVE_FOOT_FFSIZE_PERCENT = 0.95 # 小于正文的判定为不是正文有些文章的正文部分字体大小不是100%统一的,有肉眼不可见的小变化)
def primary_ffsize(l): def primary_ffsize(l):
""" """
提取文本块主字体 提取文本块主字体
@ -427,11 +444,11 @@ def read_and_clean_pdf_text(fp):
fsize_statiscs[wtf['size']] += len(wtf['text']) fsize_statiscs[wtf['size']] += len(wtf['text'])
return max(fsize_statiscs, key=fsize_statiscs.get) return max(fsize_statiscs, key=fsize_statiscs.get)
def ffsize_same(a,b): def ffsize_same(a, b):
""" """
提取字体大小是否近似相等 提取字体大小是否近似相等
""" """
return abs((a-b)/max(a,b)) < 0.02 return abs((a - b) / max(a, b)) < 0.02
with fitz.open(fp) as doc: with fitz.open(fp) as doc:
meta_txt = [] meta_txt = []
@ -454,7 +471,8 @@ def read_and_clean_pdf_text(fp):
for wtf in l['spans']: # for l in t['lines']: for wtf in l['spans']: # for l in t['lines']:
meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])]) meta_span.append([wtf['text'], wtf['size'], len(wtf['text'])])
# meta_line.append(["NEW_BLOCK", pf]) # meta_line.append(["NEW_BLOCK", pf])
# 块元提取 for each word segment with in line for each line cross-line words for each block # 块元提取 for each word segment with in line for each line
# cross-line words for each block
meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace( meta_txt.extend([" ".join(["".join([wtf['text'] for wtf in l['spans']]) for l in t['lines']]).replace(
'- ', '') for t in text_areas['blocks'] if 'lines' in t]) '- ', '') for t in text_areas['blocks'] if 'lines' in t])
meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']]) meta_font.extend([np.mean([np.mean([wtf['size'] for wtf in l['spans']])
@ -482,18 +500,19 @@ def read_and_clean_pdf_text(fp):
if REMOVE_FOOT_NOTE: if REMOVE_FOOT_NOTE:
if meta_line[index][fs] <= give_up_fize_threshold: if meta_line[index][fs] <= give_up_fize_threshold:
continue continue
if ffsize_same(meta_line[index][fs], meta_line[index-1][fs]): if ffsize_same(meta_line[index][fs], meta_line[index - 1][fs]):
# 尝试识别段落 # 尝试识别段落
if meta_line[index][fc].endswith('.') and\ if meta_line[index][fc].endswith('.') and \
(meta_line[index-1][fc] != 'NEW_BLOCK') and \ (meta_line[index - 1][fc] != 'NEW_BLOCK') and \
(meta_line[index][fb][2] - meta_line[index][fb][0]) < (meta_line[index-1][fb][2] - meta_line[index-1][fb][0]) * 0.7: (meta_line[index][fb][2] - meta_line[index][fb][0]) < (
meta_line[index - 1][fb][2] - meta_line[index - 1][fb][0]) * 0.7:
sec[-1] += line[fc] sec[-1] += line[fc]
sec[-1] += "\n\n" sec[-1] += "\n\n"
else: else:
sec[-1] += " " sec[-1] += " "
sec[-1] += line[fc] sec[-1] += line[fc]
else: else:
if (index+1 < len(meta_line)) and \ if (index + 1 < len(meta_line)) and \
meta_line[index][fs] > main_fsize: meta_line[index][fs] > main_fsize:
# 单行 + 字体大 # 单行 + 字体大
mega_sec.append(copy.deepcopy(sec)) mega_sec.append(copy.deepcopy(sec))
@ -501,7 +520,7 @@ def read_and_clean_pdf_text(fp):
sec.append("# " + line[fc]) sec.append("# " + line[fc])
else: else:
# 尝试识别section # 尝试识别section
if meta_line[index-1][fs] > meta_line[index][fs]: if meta_line[index - 1][fs] > meta_line[index][fs]:
sec.append("\n" + line[fc]) sec.append("\n" + line[fc])
else: else:
sec.append(line[fc]) sec.append(line[fc])
@ -520,13 +539,15 @@ def read_and_clean_pdf_text(fp):
if len(block_txt) < 100: if len(block_txt) < 100:
meta_txt[index] = '\n' meta_txt[index] = '\n'
return meta_txt return meta_txt
meta_txt = 把字符太少的块清除为回车(meta_txt) meta_txt = 把字符太少的块清除为回车(meta_txt)
def 清理多余的空行(meta_txt): def 清理多余的空行(meta_txt):
for index in reversed(range(1, len(meta_txt))): for index in reversed(range(1, len(meta_txt))):
if meta_txt[index] == '\n' and meta_txt[index-1] == '\n': if meta_txt[index] == '\n' and meta_txt[index - 1] == '\n':
meta_txt.pop(index) meta_txt.pop(index)
return meta_txt return meta_txt
meta_txt = 清理多余的空行(meta_txt) meta_txt = 清理多余的空行(meta_txt)
def 合并小写开头的段落块(meta_txt): def 合并小写开头的段落块(meta_txt):
@ -537,16 +558,18 @@ def read_and_clean_pdf_text(fp):
return True return True
else: else:
return False return False
for _ in range(100): for _ in range(100):
for index, block_txt in enumerate(meta_txt): for index, block_txt in enumerate(meta_txt):
if starts_with_lowercase_word(block_txt): if starts_with_lowercase_word(block_txt):
if meta_txt[index-1] != '\n': if meta_txt[index - 1] != '\n':
meta_txt[index-1] += ' ' meta_txt[index - 1] += ' '
else: else:
meta_txt[index-1] = '' meta_txt[index - 1] = ''
meta_txt[index-1] += meta_txt[index] meta_txt[index - 1] += meta_txt[index]
meta_txt[index] = '\n' meta_txt[index] = '\n'
return meta_txt return meta_txt
meta_txt = 合并小写开头的段落块(meta_txt) meta_txt = 合并小写开头的段落块(meta_txt)
meta_txt = 清理多余的空行(meta_txt) meta_txt = 清理多余的空行(meta_txt)
@ -588,9 +611,10 @@ def get_files_from_everything(txt, type): # type='.md'
from toolbox import get_conf from toolbox import get_conf
proxies, = get_conf('proxies') proxies, = get_conf('proxies')
r = requests.get(txt, proxies=proxies) r = requests.get(txt, proxies=proxies)
with open('./gpt_log/temp'+type, 'wb+') as f: f.write(r.content) with open('./gpt_log/temp' + type, 'wb+') as f:
f.write(r.content)
project_folder = './gpt_log/' project_folder = './gpt_log/'
file_manifest = ['./gpt_log/temp'+type] file_manifest = ['./gpt_log/temp' + type]
elif txt.endswith(type): elif txt.endswith(type):
# 直接给定文件 # 直接给定文件
file_manifest = [txt] file_manifest = [txt]
@ -598,7 +622,7 @@ def get_files_from_everything(txt, type): # type='.md'
elif os.path.exists(txt): elif os.path.exists(txt):
# 本地路径,递归搜索 # 本地路径,递归搜索
project_folder = txt project_folder = txt
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*'+type, recursive=True)] file_manifest = [f for f in glob.glob(f'{project_folder}/**/*' + type, recursive=True)]
if len(file_manifest) == 0: if len(file_manifest) == 0:
success = False success = False
else: else:
@ -609,8 +633,6 @@ def get_files_from_everything(txt, type): # type='.md'
return success, file_manifest, project_folder return success, file_manifest, project_folder
def Singleton(cls): def Singleton(cls):
_instance = {} _instance = {}
@ -642,7 +664,6 @@ class knowledge_archive_interface():
return self.text2vec_large_chinese return self.text2vec_large_chinese
def feed_archive(self, file_manifest, id="default"): def feed_archive(self, file_manifest, id="default"):
self.threadLock.acquire() self.threadLock.acquire()
# import uuid # import uuid
@ -655,7 +676,7 @@ class knowledge_archive_interface():
history=[], history=[],
one_conent="", one_conent="",
one_content_segmentation="", one_content_segmentation="",
text2vec = self.get_chinese_text2vec(), text2vec=self.get_chinese_text2vec(),
) )
self.threadLock.release() self.threadLock.release()
@ -677,23 +698,24 @@ class knowledge_archive_interface():
history=[], history=[],
one_conent="", one_conent="",
one_content_segmentation="", one_content_segmentation="",
text2vec = self.get_chinese_text2vec(), text2vec=self.get_chinese_text2vec(),
) )
VECTOR_SEARCH_SCORE_THRESHOLD = 0 VECTOR_SEARCH_SCORE_THRESHOLD = 0
VECTOR_SEARCH_TOP_K = 4 VECTOR_SEARCH_TOP_K = 4
CHUNK_SIZE = 512 CHUNK_SIZE = 512
resp, prompt = self.qa_handle.get_knowledge_based_conent_test( resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
query = txt, query=txt,
vs_path = self.kai_path, vs_path=self.kai_path,
score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD, score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
vector_search_top_k=VECTOR_SEARCH_TOP_K, vector_search_top_k=VECTOR_SEARCH_TOP_K,
chunk_conent=True, chunk_conent=True,
chunk_size=CHUNK_SIZE, chunk_size=CHUNK_SIZE,
text2vec = self.get_chinese_text2vec(), text2vec=self.get_chinese_text2vec(),
) )
self.threadLock.release() self.threadLock.release()
return resp, prompt return resp, prompt
def try_install_deps(deps): def try_install_deps(deps):
for dep in deps: for dep in deps:
import subprocess, sys import subprocess, sys

View File

@ -1,17 +1,19 @@
from toolbox import update_ui
from toolbox import CatchException, report_execption, write_results_to_file from toolbox import CatchException, report_execption, write_results_to_file
from toolbox import update_ui
from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
fast_debug = False fast_debug = False
def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt): def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
import time, glob, os import time
import os
print('begin analysis on:', file_manifest) print('begin analysis on:', file_manifest)
for index, fp in enumerate(file_manifest): for index, fp in enumerate(file_manifest):
with open(fp, 'r', encoding='utf-8', errors='replace') as f: with open(fp, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read() file_content = f.read()
prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index==0 else "" prefix = "接下来请你逐文件分析下面的论文文件,概括其内容" if index == 0 else ""
i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```' i_say = prefix + f'请对下面的文章片段用中文做一个概述,文件名是{os.path.relpath(fp, project_folder)},文章内容是 ```{file_content}```'
i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}' i_say_show_user = prefix + f'[{index}/{len(file_manifest)}] 请对下面的文章片段做一个概述: {os.path.abspath(fp)}'
chatbot.append((i_say_show_user, "[Local Message] waiting gpt response.")) chatbot.append((i_say_show_user, "[Local Message] waiting gpt response."))
@ -20,10 +22,13 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
if not fast_debug: if not fast_debug:
msg = '正常' msg = '正常'
# ** gpt request ** # ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs, chatbot, history=[], sys_prompt=system_prompt) # 带超时倒计时 gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say_show_user, llm_kwargs,
chatbot, history=[],
sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say_show_user, gpt_say) chatbot[-1] = (i_say_show_user, gpt_say)
history.append(i_say_show_user); history.append(gpt_say) history.append(i_say_show_user);
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
if not fast_debug: time.sleep(2) if not fast_debug: time.sleep(2)
@ -35,33 +40,39 @@ def 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbo
if not fast_debug: if not fast_debug:
msg = '正常' msg = '正常'
# ** gpt request ** # ** gpt request **
gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot, history=history, sys_prompt=system_prompt) # 带超时倒计时 gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(i_say, i_say, llm_kwargs, chatbot,
history=history,
sys_prompt=system_prompt) # 带超时倒计时
chatbot[-1] = (i_say, gpt_say) chatbot[-1] = (i_say, gpt_say)
history.append(i_say); history.append(gpt_say) history.append(i_say)
history.append(gpt_say)
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
res = write_results_to_file(history) res = write_results_to_file(history)
chatbot.append(("完成了吗?", res)) chatbot.append(("完成了吗?", res))
yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg=msg) # 刷新界面
@CatchException @CatchException
def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): def 读文章写摘要(txt, llm_kwargs, plugin_kwargs, chatbot, system_prompt, web_port, history=None):
# history = [] # 清空历史,以免输入溢出
if history is None:
history = [] # 清空历史,以免输入溢出 history = [] # 清空历史,以免输入溢出
import glob, os import glob
import os
if os.path.exists(txt): if os.path.exists(txt):
project_folder = txt project_folder = txt
else: else:
if txt == "": txt = '空空如也的输入栏' if txt == "":
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") txt = '空空如也的输入栏'
report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return return
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \ file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] # + \
# [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \ # [f for f in glob.glob(f'{project_folder}/**/*.cpp', recursive=True)] + \
# [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)] # [f for f in glob.glob(f'{project_folder}/**/*.c', recursive=True)]
if len(file_manifest) == 0: if len(file_manifest) == 0:
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") report_execption(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何.tex文件: {txt}")
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
return return
yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt) yield from 解析Paper(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt)

View File

@ -28,6 +28,7 @@ proxies, API_KEY, TIMEOUT_SECONDS, MAX_RETRY = \
timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \ timeout_bot_msg = '[Local Message] Request timeout. Network error. Please check proxy settings in config.py.' + \
'网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。' '网络错误,检查代理服务器是否可用,以及代理设置的格式是否正确,格式须是[协议]://[地址]:[端口],缺一不可。'
def get_full_error(chunk, stream_response): def get_full_error(chunk, stream_response):
""" """
获取完整的从Openai返回的报错 获取完整的从Openai返回的报错
@ -40,7 +41,9 @@ def get_full_error(chunk, stream_response):
return chunk return chunk
def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="", observe_window=None, console_slience=False): def predict_no_ui_long_connection(
inputs, llm_kwargs, history=None, sys_prompt="", observe_window=None, console_slience=False
):
""" """
发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。 发送至chatGPT等待回复一次性完成不显示中间过程。但内部用stream的方法避免中途网线被掐。
inputs inputs
@ -54,45 +57,59 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
observe_window = None observe_window = None
用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗 用于负责跨越线程传递已经输出的部分大部分时候仅仅为了fancy的视觉效果留空即可。observe_window[0]观测窗。observe_window[1]:看门狗
""" """
if history is None:
history = []
watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可 watch_dog_patience = 5 # 看门狗的耐心, 设置5秒即可
headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True) headers, payload = generate_payload(inputs, llm_kwargs, history, system_prompt=sys_prompt, stream=True)
retry = 0 retry = 0
from bridge_all import model_info
while True: while True:
try: try:
# make a POST request to the API endpoint, stream=False # make a POST request to the API endpoint, stream=False
from .bridge_all import model_info
endpoint = model_info[llm_kwargs['llm_model']]['endpoint'] endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
response = requests.post(endpoint, headers=headers, proxies=proxies, response = requests.post(endpoint, headers=headers, proxies=proxies,
json=payload, stream=True, timeout=TIMEOUT_SECONDS); break json=payload, stream=True, timeout=TIMEOUT_SECONDS)
except requests.exceptions.ReadTimeout as e: stream_response = response.iter_lines()
break
except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectionError):
retry += 1 retry += 1
traceback.print_exc() traceback.print_exc()
if retry > MAX_RETRY: raise TimeoutError if retry > MAX_RETRY:
if MAX_RETRY!=0: print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……') raise TimeoutError
if MAX_RETRY != 0:
print(f'请求超时,正在重试 ({retry}/{MAX_RETRY}) ……')
except Exception as e:
print(f"出现异常:{e}")
raise e
stream_response = response.iter_lines()
result = '' result = ''
while True: while True:
try: chunk = next(stream_response).decode() try:
chunk = next(stream_response).decode()
except StopIteration: except StopIteration:
break break
except requests.exceptions.ConnectionError: # except requests.exceptions.ConnectionError:
chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。 # chunk = next(stream_response).decode() # 失败了,重试一次?再失败就没办法了。
if len(chunk)==0: continue if len(chunk) == 0:
continue
if not chunk.startswith('data:'): if not chunk.startswith('data:'):
error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode() error_msg = get_full_error(chunk.encode('utf8'), stream_response).decode()
if "reduce the length" in error_msg: if "reduce the length" in error_msg:
raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg) raise ConnectionAbortedError("OpenAI拒绝了请求:" + error_msg)
else: else:
raise RuntimeError("OpenAI拒绝了请求" + error_msg) raise RuntimeError("OpenAI拒绝了请求" + error_msg)
if ('data: [DONE]' in chunk): break # api2d 正常完成 if 'data: [DONE]' in chunk:
break # api2d 正常完成
json_data = json.loads(chunk.lstrip('data:'))['choices'][0] json_data = json.loads(chunk.lstrip('data:'))['choices'][0]
delta = json_data["delta"] delta = json_data["delta"]
if len(delta) == 0: break if len(delta) == 0:
if "role" in delta: continue break
if "role" in delta:
continue
if "content" in delta: if "content" in delta:
result += delta["content"] result += delta["content"]
if not console_slience: print(delta["content"], end='') if not console_slience:
print(delta["content"], end='')
if observe_window is not None: if observe_window is not None:
# 观测窗,把已经获取的数据显示出去 # 观测窗,把已经获取的数据显示出去
if len(observe_window) >= 1: observe_window[0] += delta["content"] if len(observe_window) >= 1: observe_window[0] += delta["content"]
@ -100,7 +117,8 @@ def predict_no_ui_long_connection(inputs, llm_kwargs, history=[], sys_prompt="",
if len(observe_window) >= 2: if len(observe_window) >= 2:
if (time.time()-observe_window[1]) > watch_dog_patience: if (time.time()-observe_window[1]) > watch_dog_patience:
raise RuntimeError("用户取消了程序。") raise RuntimeError("用户取消了程序。")
else: raise RuntimeError("意外Json结构"+delta) else:
raise RuntimeError("意外Json结构"+delta)
if json_data['finish_reason'] == 'length': if json_data['finish_reason'] == 'length':
raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。") raise ConnectionAbortedError("正常结束但显示Token不足导致输出不完整请削减单次输入的文本量。")
return result return result
@ -228,6 +246,7 @@ def predict(inputs, llm_kwargs, plugin_kwargs, chatbot, history=[], system_promp
yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面 yield from update_ui(chatbot=chatbot, history=history, msg="Json异常" + error_msg) # 刷新界面
return return
def generate_payload(inputs, llm_kwargs, history, system_prompt, stream): def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
""" """
整合所有信息选择LLM模型生成http请求为发送请求做准备 整合所有信息选择LLM模型生成http请求为发送请求做准备
@ -247,23 +266,19 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
messages = [{"role": "system", "content": system_prompt}] messages = [{"role": "system", "content": system_prompt}]
if conversation_cnt: if conversation_cnt:
for index in range(0, 2*conversation_cnt, 2): for index in range(0, 2*conversation_cnt, 2):
what_i_have_asked = {} what_i_have_asked = {"role": "user", "content": history[index]}
what_i_have_asked["role"] = "user" what_gpt_answer = {"role": "assistant", "content": history[index + 1]}
what_i_have_asked["content"] = history[index]
what_gpt_answer = {}
what_gpt_answer["role"] = "assistant"
what_gpt_answer["content"] = history[index+1]
if what_i_have_asked["content"] != "": if what_i_have_asked["content"] != "":
if what_gpt_answer["content"] == "": continue if what_gpt_answer["content"] == "":
if what_gpt_answer["content"] == timeout_bot_msg: continue continue
if what_gpt_answer["content"] == timeout_bot_msg:
continue
messages.append(what_i_have_asked) messages.append(what_i_have_asked)
messages.append(what_gpt_answer) messages.append(what_gpt_answer)
else: else:
messages[-1]['content'] = what_gpt_answer['content'] messages[-1]['content'] = what_gpt_answer['content']
what_i_ask_now = {} what_i_ask_now = {"role": "user", "content": inputs}
what_i_ask_now["role"] = "user"
what_i_ask_now["content"] = inputs
messages.append(what_i_ask_now) messages.append(what_i_ask_now)
payload = { payload = {
@ -278,8 +293,8 @@ def generate_payload(inputs, llm_kwargs, history, system_prompt, stream):
} }
try: try:
print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........") print(f" {llm_kwargs['llm_model']} : {conversation_cnt} : {inputs[:100]} ..........")
except: except Exception as e:
print('输入中可能存在乱码。') print(f'输入中可能存在乱码。抛出异常: {e}')
return headers,payload return headers, payload

View File

@ -1,4 +1,4 @@
./docs/gradio-3.32.2-py3-none-any.whl gradio>=3.33.1
tiktoken>=0.3.3 tiktoken>=0.3.3
requests[socks] requests[socks]
transformers transformers
@ -16,3 +16,5 @@ openai
numpy numpy
arxiv arxiv
rich rich
langchain
zh_langchain

View File

@ -21,6 +21,7 @@ pj = os.path.join
======================================================================== ========================================================================
""" """
class ChatBotWithCookies(list): class ChatBotWithCookies(list):
def __init__(self, cookie): def __init__(self, cookie):
self._cookies = cookie self._cookies = cookie
@ -71,11 +72,13 @@ def update_ui(chatbot, history, msg='正常', **kwargs): # 刷新界面
assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时可用clear将其清空然后用for+append循环重新赋值。" assert isinstance(chatbot, ChatBotWithCookies), "在传递chatbot的过程中不要将其丢弃。必要时可用clear将其清空然后用for+append循环重新赋值。"
yield chatbot.get_cookies(), chatbot, history, msg yield chatbot.get_cookies(), chatbot, history, msg
def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面 def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
""" """
刷新用户界面 刷新用户界面
""" """
if len(chatbot) == 0: chatbot.append(["update_ui_last_msg", lastmsg]) if len(chatbot) == 0:
chatbot.append(["update_ui_last_msg", lastmsg])
chatbot[-1] = list(chatbot[-1]) chatbot[-1] = list(chatbot[-1])
chatbot[-1][-1] = lastmsg chatbot[-1][-1] = lastmsg
yield from update_ui(chatbot=chatbot, history=history) yield from update_ui(chatbot=chatbot, history=history)
@ -83,24 +86,25 @@ def update_ui_lastest_msg(lastmsg, chatbot, history, delay=1): # 刷新界面
def trimmed_format_exc(): def trimmed_format_exc():
import os, traceback import os
str = traceback.format_exc() import traceback
_str = traceback.format_exc()
current_path = os.getcwd() current_path = os.getcwd()
replace_path = "." replace_path = "."
return str.replace(current_path, replace_path) return _str.replace(current_path, replace_path)
def CatchException(f): def CatchException(f):
""" """
装饰器函数捕捉函数f中的异常并封装到一个生成器中返回并显示到聊天当中。 装饰器函数捕捉函数f中的异常并封装到一个生成器中返回并显示到聊天当中。
""" """
@wraps(f) @wraps(f)
def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1): def decorated(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT=-1):
try: try:
yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT) yield from f(txt, top_p, temperature, chatbot, history, systemPromptTxt, WEB_PORT)
except Exception as e: except Exception as e:
from check_proxy import check_proxy from check_proxy import check_proxy
from toolbox import get_conf # from toolbox import get_conf # 不需要导入本文件内容
proxies, = get_conf('proxies') proxies, = get_conf('proxies')
tb_str = '```\n' + trimmed_format_exc() + '```' tb_str = '```\n' + trimmed_format_exc() + '```'
if len(chatbot) == 0: if len(chatbot) == 0:
@ -148,6 +152,7 @@ def HotReload(f):
======================================================================== ========================================================================
""" """
def get_reduce_token_percent(text): def get_reduce_token_percent(text):
""" """
* 此函数未来将被弃用 * 此函数未来将被弃用
@ -207,8 +212,6 @@ def regular_txt_to_markdown(text):
return text return text
def report_execption(chatbot, history, a, b): def report_execption(chatbot, history, a, b):
""" """
向chatbot中添加错误信息 向chatbot中添加错误信息
@ -238,6 +241,7 @@ def text_divide_paragraph(text):
text = "</br>".join(lines) text = "</br>".join(lines)
return pre + text + suf return pre + text + suf
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt): def markdown_convertion(txt):
""" """
@ -440,6 +444,7 @@ def find_recent_files(directory):
return recent_files return recent_files
def promote_file_to_downloadzone(file, rename_file=None, chatbot=None): def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
# 将文件复制一份到下载区 # 将文件复制一份到下载区
import shutil import shutil
@ -452,6 +457,7 @@ def promote_file_to_downloadzone(file, rename_file=None, chatbot=None):
else: current = [] else: current = []
chatbot._cookies.update({'file_to_promote': [new_path] + current}) chatbot._cookies.update({'file_to_promote': [new_path] + current})
def on_file_uploaded(files, chatbot, txt, txt2, checkboxes): def on_file_uploaded(files, chatbot, txt, txt2, checkboxes):
""" """
当文件被上传时的回调函数 当文件被上传时的回调函数
@ -505,17 +511,20 @@ def on_report_generated(cookies, files, chatbot):
chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}']) chatbot.append(['报告如何远程获取?', f'报告已经添加到右侧“文件上传区”(可能处于折叠状态),请查收。{file_links}'])
return cookies, report_files, chatbot return cookies, report_files, chatbot
def is_openai_api_key(key): def is_openai_api_key(key):
API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key) API_MATCH_ORIGINAL = re.match(r"sk-[a-zA-Z0-9]{48}$", key)
API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key) API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE) return bool(API_MATCH_ORIGINAL) or bool(API_MATCH_AZURE)
def is_api2d_key(key): def is_api2d_key(key):
if key.startswith('fk') and len(key) == 41: if key.startswith('fk') and len(key) == 41:
return True return True
else: else:
return False return False
def is_any_api_key(key): def is_any_api_key(key):
if ',' in key: if ',' in key:
keys = key.split(',') keys = key.split(',')
@ -525,6 +534,7 @@ def is_any_api_key(key):
else: else:
return is_openai_api_key(key) or is_api2d_key(key) return is_openai_api_key(key) or is_api2d_key(key)
def what_keys(keys): def what_keys(keys):
avail_key_list = {'OpenAI Key':0, "API2D Key":0} avail_key_list = {'OpenAI Key':0, "API2D Key":0}
key_list = keys.split(',') key_list = keys.split(',')
@ -539,6 +549,7 @@ def what_keys(keys):
return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']}API2D Key {avail_key_list['API2D Key']}" return f"检测到: OpenAI Key {avail_key_list['OpenAI Key']}API2D Key {avail_key_list['API2D Key']}"
def select_api_key(keys, llm_model): def select_api_key(keys, llm_model):
import random import random
avail_key_list = [] avail_key_list = []
@ -558,6 +569,7 @@ def select_api_key(keys, llm_model):
api_key = random.choice(avail_key_list) # 随机负载均衡 api_key = random.choice(avail_key_list) # 随机负载均衡
return api_key return api_key
def read_env_variable(arg, default_value): def read_env_variable(arg, default_value):
""" """
环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG` 环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先),也可以直接是`CONFIG`
@ -612,6 +624,7 @@ def read_env_variable(arg, default_value):
print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}") print亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
return r return r
@lru_cache(maxsize=128) @lru_cache(maxsize=128)
def read_single_conf_with_lru_cache(arg): def read_single_conf_with_lru_cache(arg):
from colorful import print亮红, print亮绿, print亮蓝 from colorful import print亮红, print亮绿, print亮蓝
@ -676,6 +689,7 @@ class DummyWith():
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
return return
def run_gradio_in_subpath(demo, auth, port, custom_path): def run_gradio_in_subpath(demo, auth, port, custom_path):
""" """
把gradio的运行地址更改到指定的二次路径上 把gradio的运行地址更改到指定的二次路径上
@ -770,6 +784,7 @@ def clip_history(inputs, history, tokenizer, max_token_limit):
======================================================================== ========================================================================
""" """
def zip_folder(source_folder, dest_folder, zip_name): def zip_folder(source_folder, dest_folder, zip_name):
import zipfile import zipfile
import os import os
@ -801,6 +816,7 @@ def zip_folder(source_folder, dest_folder, zip_name):
print(f"Zip file created at {zip_file}") print(f"Zip file created at {zip_file}")
def zip_result(folder): def zip_result(folder):
import time import time
t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) t = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
@ -811,6 +827,7 @@ def gen_time_str():
import time import time
return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) return time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
class ProxyNetworkActivate(): class ProxyNetworkActivate():
""" """
这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理 这段代码定义了一个名为TempProxy的空上下文管理器, 用于给一小段代码上代理
@ -830,12 +847,14 @@ class ProxyNetworkActivate():
if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY') if 'HTTPS_PROXY' in os.environ: os.environ.pop('HTTPS_PROXY')
return return
def objdump(obj, file='objdump.tmp'): def objdump(obj, file='objdump.tmp'):
import pickle import pickle
with open(file, 'wb+') as f: with open(file, 'wb+') as f:
pickle.dump(obj, f) pickle.dump(obj, f)
return return
def objload(file='objdump.tmp'): def objload(file='objdump.tmp'):
import pickle, os import pickle, os
if not os.path.exists(file): if not os.path.exists(file):