|
from toolbox import update_ui |
|
from toolbox import CatchException, report_execption, write_results_to_file, predict_no_ui_but_counting_down |
|
fast_debug = False |
|
|
|
class PaperFileGroup(): |
|
def __init__(self): |
|
self.file_paths = [] |
|
self.file_contents = [] |
|
self.sp_file_contents = [] |
|
self.sp_file_index = [] |
|
self.sp_file_tag = [] |
|
|
|
|
|
import tiktoken |
|
from toolbox import get_conf |
|
enc = tiktoken.encoding_for_model(*get_conf('LLM_MODEL')) |
|
def get_token_num(txt): return len(enc.encode(txt)) |
|
self.get_token_num = get_token_num |
|
|
|
def run_file_split(self, max_token_limit=1900): |
|
""" |
|
将长文本分离开来 |
|
""" |
|
for index, file_content in enumerate(self.file_contents): |
|
if self.get_token_num(file_content) < max_token_limit: |
|
self.sp_file_contents.append(file_content) |
|
self.sp_file_index.append(index) |
|
self.sp_file_tag.append(self.file_paths[index]) |
|
else: |
|
from .crazy_utils import breakdown_txt_to_satisfy_token_limit_for_pdf |
|
segments = breakdown_txt_to_satisfy_token_limit_for_pdf(file_content, self.get_token_num, max_token_limit) |
|
for j, segment in enumerate(segments): |
|
self.sp_file_contents.append(segment) |
|
self.sp_file_index.append(index) |
|
self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex") |
|
|
|
print('Segmentation: done') |
|
|
|
def 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en'): |
|
import time, os, re |
|
from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency |
|
|
|
|
|
|
|
pfg = PaperFileGroup() |
|
|
|
for index, fp in enumerate(file_manifest): |
|
with open(fp, 'r', encoding='utf-8') as f: |
|
file_content = f.read() |
|
|
|
comment_pattern = r'%.*' |
|
|
|
clean_tex_content = re.sub(comment_pattern, '', file_content) |
|
|
|
pfg.file_paths.append(fp) |
|
pfg.file_contents.append(clean_tex_content) |
|
|
|
|
|
pfg.run_file_split(max_token_limit=1024) |
|
n_split = len(pfg.sp_file_contents) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if language == 'en': |
|
inputs_array = ["Below is a section from an academic paper, polish this section to meet the academic standard, improve the grammar, clarity and overall readability, do not modify any latex command such as \section, \cite and equations:" + |
|
f"\n\n{frag}" for frag in pfg.sp_file_contents] |
|
inputs_show_user_array = [f"Polish {f}" for f in pfg.sp_file_tag] |
|
sys_prompt_array = ["You are a professional academic paper writer." for _ in range(n_split)] |
|
elif language == 'zh': |
|
inputs_array = [f"这里有一个使用Latex格式的学术论文,请把写作风格要求的学术风格进行润色,改进拼写、语法、清晰度、简洁度和整体可读性。" + |
|
f"论文现在开始:\n{frag}" for frag in pfg.sp_file_contents] |
|
inputs_show_user_array = [f"润色 {f}" for f in pfg.sp_file_tag] |
|
sys_prompt_array=["你是一位专业的学术论文作家。润色以下论文。输出中保留Latex格式。" for _ in range(n_split)] |
|
|
|
|
|
gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency( |
|
inputs_array=inputs_array, |
|
inputs_show_user_array=inputs_show_user_array, |
|
llm_kwargs=llm_kwargs, |
|
chatbot=chatbot, |
|
history_array=[[""] for _ in range(n_split)], |
|
sys_prompt_array=sys_prompt_array, |
|
max_workers=10, |
|
scroller_max_len = 80 |
|
) |
|
|
|
|
|
create_report_file_name = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + f"-chatgpt.polish.md" |
|
res = write_results_to_file(gpt_response_collection, file_name=create_report_file_name) |
|
history = gpt_response_collection |
|
chatbot.append((f"{fp}完成了吗?", res)) |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
|
|
|
|
@CatchException |
|
def Latex英文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): |
|
|
|
chatbot.append([ |
|
"函数插件功能?", |
|
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"]) |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
|
|
|
|
try: |
|
import tiktoken |
|
except: |
|
report_execption(chatbot, history, |
|
a=f"解析项目: {txt}", |
|
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
return |
|
history = [] |
|
import glob, os |
|
if os.path.exists(txt): |
|
project_folder = txt |
|
else: |
|
if txt == "": txt = '空空如也的输入栏' |
|
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
return |
|
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] |
|
if len(file_manifest) == 0: |
|
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
return |
|
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='en') |
|
|
|
|
|
|
|
|
|
|
|
|
|
@CatchException |
|
def Latex中文润色(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port): |
|
|
|
chatbot.append([ |
|
"函数插件功能?", |
|
"对整个Latex项目进行润色。函数插件贡献者: Binary-Husky"]) |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
|
|
|
|
try: |
|
import tiktoken |
|
except: |
|
report_execption(chatbot, history, |
|
a=f"解析项目: {txt}", |
|
b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade tiktoken```。") |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
return |
|
history = [] |
|
import glob, os |
|
if os.path.exists(txt): |
|
project_folder = txt |
|
else: |
|
if txt == "": txt = '空空如也的输入栏' |
|
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到本地项目或无权访问: {txt}") |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
return |
|
file_manifest = [f for f in glob.glob(f'{project_folder}/**/*.tex', recursive=True)] |
|
if len(file_manifest) == 0: |
|
report_execption(chatbot, history, a = f"解析项目: {txt}", b = f"找不到任何.tex文件: {txt}") |
|
yield from update_ui(chatbot=chatbot, history=history) |
|
return |
|
yield from 多文件润色(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, language='zh') |