article / utils /__init__.py
zxsipola123456's picture
Upload 33 files
591004d verified
#!python
# -*- coding: utf-8 -*-
# @author: Kun
import re
from global_config import lang_opt, llm_model_opt
if "openai" == llm_model_opt:
from utils.openai_util import get_api_response
elif "vicuna" == llm_model_opt:
from utils.vicuna_util import get_api_response
elif "chatglm" == llm_model_opt:
from utils.chatglm_util import get_api_response
elif "baichuan" == llm_model_opt:
from utils.baichuan_util import get_api_response
elif "aquila" == llm_model_opt:
from utils.aquila_util import get_api_response
elif "falcon" == llm_model_opt:
from utils.falcon_util import get_api_response
else:
raise Exception("not supported llm model name: {}".format(llm_model_opt))
def get_content_between_a_b(a, b, text):
if "en" == lang_opt:
if "vicuna" == llm_model_opt:
return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip()
elif "openai" == llm_model_opt:
return re.search(f"{a}(.*?)\n{b}", text, re.DOTALL).group(1).strip()
elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip()
else:
raise Exception(
"not supported llm model name: {}".format(llm_model_opt))
elif lang_opt in ["zh1", "zh2"]:
if "vicuna" == llm_model_opt:
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
elif "openai" == llm_model_opt:
match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL)
elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
else:
raise Exception(
"not supported llm model name: {}".format(llm_model_opt))
if match:
return match.group(1).strip()
else:
if "1" in a or "2" in a or "3" in a:
a = ''.join(a.split(" "))
if "1" in b or "2" in b or "3" in b:
b = "".join(b.split(" "))
if "vicuna" == llm_model_opt:
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
elif "openai" == llm_model_opt:
match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL)
elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
else:
raise Exception(
"not supported llm model name: {}".format(llm_model_opt))
if match:
return match.group(1).strip()
else:
# 处理找不到匹配内容的情况
return "翻译时出现错误请重试" # 或者返回其他默认值或采取其他的处理方式
else:
raise Exception(f"not supported language: {lang_opt}")
def get_init(init_text=None, text=None, response_file=None, model=None, tokenizer=None):
"""
init_text: if the title, outline, and the first 3 paragraphs are given in a .txt file, directly read
text: if no .txt file is given, use init prompt to generate
"""
if not init_text:
response = get_api_response(model, tokenizer, text)
print("response: {}".format(response))
if response_file:
with open(response_file, 'a', encoding='utf-8') as f:
f.write(f"Init output here:\n{response}\n\n")
else:
with open(init_text, 'r', encoding='utf-8') as f:
response = f.read()
f.close()
paragraphs = {
"name": "",
"Outline": "",
"Paragraph 1": "",
"Paragraph 2": "",
"Paragraph 3": "",
"Summary": "",
"Instruction 1": "",
"Instruction 2": "",
"Instruction 3": ""
}
if "en" == lang_opt:
paragraphs['name'] = get_content_between_a_b(
'Name:', 'Outline', response)
paragraphs['Paragraph 1'] = get_content_between_a_b(
'Paragraph 1:', 'Paragraph 2:', response)
paragraphs['Paragraph 2'] = get_content_between_a_b(
'Paragraph 2:', 'Paragraph 3:', response)
paragraphs['Paragraph 3'] = get_content_between_a_b(
'Paragraph 3:', 'Summary', response)
paragraphs['Summary'] = get_content_between_a_b(
'Summary:', 'Instruction 1', response)
paragraphs['Instruction 1'] = get_content_between_a_b(
'Instruction 1:', 'Instruction 2', response)
paragraphs['Instruction 2'] = get_content_between_a_b(
'Instruction 2:', 'Instruction 3', response)
lines = response.splitlines()
# content of Instruction 3 may be in the same line with I3 or in the next line
if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):]
elif lines[-1] != '\n':
paragraphs['Instruction 3'] = lines[-1]
# Sometimes it gives Chapter outline, sometimes it doesn't
for line in lines:
if line.startswith('Chapter'):
paragraphs['Outline'] = get_content_between_a_b(
'Outline:', 'Chapter', response)
break
if paragraphs['Outline'] == '':
paragraphs['Outline'] = get_content_between_a_b(
'Outline:', 'Paragraph', response)
elif lang_opt in ["zh1", "zh2"]:
paragraphs['name'] = get_content_between_a_b('名称:', '概述:', response)
paragraphs['Paragraph 1'] = get_content_between_a_b(
'段落 1:', '段落 2:', response)
paragraphs['Paragraph 2'] = get_content_between_a_b(
'段落 2:', '段落 3:', response)
paragraphs['Paragraph 3'] = get_content_between_a_b(
'段落 3:', '总结:', response)
paragraphs['Summary'] = get_content_between_a_b(
'总结:', '指令 1', response)
paragraphs['Instruction 1'] = get_content_between_a_b(
'指令 1:', '指令 2:', response)
paragraphs['Instruction 2'] = get_content_between_a_b(
'指令 2:', '指令 3:', response)
lines = response.splitlines()
# content of Instruction 3 may be in the same line with I3 or in the next line
if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):]
elif lines[-1] != '\n':
paragraphs['Instruction 3'] = lines[-1]
# Sometimes it gives Chapter outline, sometimes it doesn't
for line in lines:
if line.startswith('Chapter'):
paragraphs['Outline'] = get_content_between_a_b(
'概述:', 'Chapter', response)
break
if paragraphs['Outline'] == '':
paragraphs['Outline'] = get_content_between_a_b(
'概述:', '段落', response)
return paragraphs
def get_chatgpt_response(model, prompt):
response = ""
for data in model.ask(prompt):
response = data["message"]
model.delete_conversation(model.conversation_id)
model.reset_chat()
return response
def parse_instructions(instructions):
output = ""
for i in range(len(instructions)):
output += f"{i+1}. {instructions[i]}\n"
return output