提交一部分代码,优化结构

This commit is contained in:
w_xiaolizu
2023-06-18 18:45:12 +08:00
parent 56679a41ef
commit 2333b4e8ef

View File

@ -1,21 +1,16 @@
import markdown import markdown
import importlib import importlib
import time
import inspect import inspect
import re
import gradio as gr import gradio as gr
import func_box import func_box
import os
from latex2mathml.converter import convert as tex2mathml from latex2mathml.converter import convert as tex2mathml
from functools import wraps, lru_cache from functools import wraps, lru_cache
import logging
import shutil import shutil
import os import os
import time import time
import glob import glob
import sys import sys
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
import html
############################### 插件输入输出接驳区 ####################################### ############################### 插件输入输出接驳区 #######################################
""" """
@ -297,7 +292,7 @@ def text_divide_paragraph(input_str):
return input_str return input_str
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度 @lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt): def markdown_convertion(txt):
""" """
将Markdown格式的文本转换为HTML格式。如果包含数学公式则先将公式转换为HTML格式。 将Markdown格式的文本转换为HTML格式。如果包含数学公式则先将公式转换为HTML格式。
@ -306,7 +301,7 @@ def markdown_convertion(txt):
suf = '</div>' suf = '</div>'
if txt.startswith(pre) and txt.endswith(suf): if txt.startswith(pre) and txt.endswith(suf):
# print('警告,输入了已经经过转化的字符串,二次转化可能出问题') # print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
return txt # 已经被转化过,不需要再次转化 return txt # 已经被转化过,不需要再次转化
markdown_extension_configs = { markdown_extension_configs = {
'mdx_math': { 'mdx_math': {
@ -314,6 +309,7 @@ def markdown_convertion(txt):
'use_gitlab_delimiters': False, 'use_gitlab_delimiters': False,
}, },
} }
find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
def tex2mathml_catch_exception(content, *args, **kwargs): def tex2mathml_catch_exception(content, *args, **kwargs):
try: try:
@ -346,7 +342,8 @@ def markdown_convertion(txt):
""" """
解决一个mdx_math的bug单$包裹begin命令时多余<script> 解决一个mdx_math的bug单$包裹begin命令时多余<script>
""" """
content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">', '<script type="math/tex; mode=display">') content = content.replace('<script type="math/tex">\n<script type="math/tex; mode=display">',
'<script type="math/tex; mode=display">')
content = content.replace('</script>\n</script>', '</script>') content = content.replace('</script>\n</script>', '</script>')
return content return content
@ -354,14 +351,16 @@ def markdown_convertion(txt):
if '```' not in txt: if '```' not in txt:
return True return True
else: else:
if '```reference' in txt: return True # newbing if '```reference' in txt:
else: return False return True # newbing
else:
return False
if ('$$' in txt) and no_code(txt): # 有$标识的公式符号,且没有代码段```的标识 if ('$' in txt) and no_code(txt): # 有$标识的公式符号,且没有代码段```的标识
# convert everything to html format # convert everything to html format
split = markdown.markdown(text='---') split = markdown.markdown(text='---')
find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>' convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'],
convert_stage_1 = markdown.markdown(text=txt, extensions=['mdx_math', 'fenced_code', 'tables', 'sane_lists'], extension_configs=markdown_extension_configs) extension_configs=markdown_extension_configs)
convert_stage_1 = markdown_bug_hunt(convert_stage_1) convert_stage_1 = markdown_bug_hunt(convert_stage_1)
# re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s). # re.DOTALL: Make the '.' special character match any character at all, including a newline; without this flag, '.' will match anything except a newline. Corresponds to the inline flag (?s).
# 1. convert to easy-to-copy tex (do not render math) # 1. convert to easy-to-copy tex (do not render math)
@ -371,8 +370,7 @@ def markdown_convertion(txt):
# cat them together # cat them together
return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf return pre + convert_stage_2_1 + f'{split}' + convert_stage_2_2 + suf
else: else:
context = markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) return pre + markdown.markdown(txt, extensions=['fenced_code', 'codehilite', 'tables', 'sane_lists']) + suf
return pre + context + suf
def close_up_code_segment_during_stream(gpt_reply): def close_up_code_segment_during_stream(gpt_reply):