Spaces:
Runtime error
Runtime error
#!python | |
# -*- coding: utf-8 -*- | |
# @author: Kun | |
import re | |
from global_config import lang_opt, llm_model_opt | |
if "openai" == llm_model_opt: | |
from utils.openai_util import get_api_response | |
elif "vicuna" == llm_model_opt: | |
from utils.vicuna_util import get_api_response | |
elif "chatglm" == llm_model_opt: | |
from utils.chatglm_util import get_api_response | |
elif "baichuan" == llm_model_opt: | |
from utils.baichuan_util import get_api_response | |
elif "aquila" == llm_model_opt: | |
from utils.aquila_util import get_api_response | |
elif "falcon" == llm_model_opt: | |
from utils.falcon_util import get_api_response | |
else: | |
raise Exception("not supported llm model name: {}".format(llm_model_opt)) | |
def get_content_between_a_b(a, b, text): | |
if "en" == lang_opt: | |
if "vicuna" == llm_model_opt: | |
return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip() | |
elif "openai" == llm_model_opt: | |
return re.search(f"{a}(.*?)\n{b}", text, re.DOTALL).group(1).strip() | |
elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]: | |
return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip() | |
else: | |
raise Exception( | |
"not supported llm model name: {}".format(llm_model_opt)) | |
elif lang_opt in ["zh1", "zh2"]: | |
if "vicuna" == llm_model_opt: | |
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) | |
elif "openai" == llm_model_opt: | |
match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL) | |
elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]: | |
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) | |
else: | |
raise Exception( | |
"not supported llm model name: {}".format(llm_model_opt)) | |
if match: | |
return match.group(1).strip() | |
else: | |
if "1" in a or "2" in a or "3" in a: | |
a = ''.join(a.split(" ")) | |
if "1" in b or "2" in b or "3" in b: | |
b = "".join(b.split(" ")) | |
if "vicuna" == llm_model_opt: | |
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) | |
elif "openai" == llm_model_opt: | |
match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL) | |
elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]: | |
match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL) | |
else: | |
raise Exception( | |
"not supported llm model name: {}".format(llm_model_opt)) | |
if match: | |
return match.group(1).strip() | |
else: | |
# 处理找不到匹配内容的情况 | |
return "翻译时出现错误请重试" # 或者返回其他默认值或采取其他的处理方式 | |
else: | |
raise Exception(f"not supported language: {lang_opt}") | |
def get_init(init_text=None, text=None, response_file=None, model=None, tokenizer=None): | |
""" | |
init_text: if the title, outline, and the first 3 paragraphs are given in a .txt file, directly read | |
text: if no .txt file is given, use init prompt to generate | |
""" | |
if not init_text: | |
response = get_api_response(model, tokenizer, text) | |
print("response: {}".format(response)) | |
if response_file: | |
with open(response_file, 'a', encoding='utf-8') as f: | |
f.write(f"Init output here:\n{response}\n\n") | |
else: | |
with open(init_text, 'r', encoding='utf-8') as f: | |
response = f.read() | |
f.close() | |
paragraphs = { | |
"name": "", | |
"Outline": "", | |
"Paragraph 1": "", | |
"Paragraph 2": "", | |
"Paragraph 3": "", | |
"Summary": "", | |
"Instruction 1": "", | |
"Instruction 2": "", | |
"Instruction 3": "" | |
} | |
if "en" == lang_opt: | |
paragraphs['name'] = get_content_between_a_b( | |
'Name:', 'Outline', response) | |
paragraphs['Paragraph 1'] = get_content_between_a_b( | |
'Paragraph 1:', 'Paragraph 2:', response) | |
paragraphs['Paragraph 2'] = get_content_between_a_b( | |
'Paragraph 2:', 'Paragraph 3:', response) | |
paragraphs['Paragraph 3'] = get_content_between_a_b( | |
'Paragraph 3:', 'Summary', response) | |
paragraphs['Summary'] = get_content_between_a_b( | |
'Summary:', 'Instruction 1', response) | |
paragraphs['Instruction 1'] = get_content_between_a_b( | |
'Instruction 1:', 'Instruction 2', response) | |
paragraphs['Instruction 2'] = get_content_between_a_b( | |
'Instruction 2:', 'Instruction 3', response) | |
lines = response.splitlines() | |
# content of Instruction 3 may be in the same line with I3 or in the next line | |
if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'): | |
paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):] | |
elif lines[-1] != '\n': | |
paragraphs['Instruction 3'] = lines[-1] | |
# Sometimes it gives Chapter outline, sometimes it doesn't | |
for line in lines: | |
if line.startswith('Chapter'): | |
paragraphs['Outline'] = get_content_between_a_b( | |
'Outline:', 'Chapter', response) | |
break | |
if paragraphs['Outline'] == '': | |
paragraphs['Outline'] = get_content_between_a_b( | |
'Outline:', 'Paragraph', response) | |
elif lang_opt in ["zh1", "zh2"]: | |
paragraphs['name'] = get_content_between_a_b('名称:', '概述:', response) | |
paragraphs['Paragraph 1'] = get_content_between_a_b( | |
'段落 1:', '段落 2:', response) | |
paragraphs['Paragraph 2'] = get_content_between_a_b( | |
'段落 2:', '段落 3:', response) | |
paragraphs['Paragraph 3'] = get_content_between_a_b( | |
'段落 3:', '总结:', response) | |
paragraphs['Summary'] = get_content_between_a_b( | |
'总结:', '指令 1', response) | |
paragraphs['Instruction 1'] = get_content_between_a_b( | |
'指令 1:', '指令 2:', response) | |
paragraphs['Instruction 2'] = get_content_between_a_b( | |
'指令 2:', '指令 3:', response) | |
lines = response.splitlines() | |
# content of Instruction 3 may be in the same line with I3 or in the next line | |
if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'): | |
paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):] | |
elif lines[-1] != '\n': | |
paragraphs['Instruction 3'] = lines[-1] | |
# Sometimes it gives Chapter outline, sometimes it doesn't | |
for line in lines: | |
if line.startswith('Chapter'): | |
paragraphs['Outline'] = get_content_between_a_b( | |
'概述:', 'Chapter', response) | |
break | |
if paragraphs['Outline'] == '': | |
paragraphs['Outline'] = get_content_between_a_b( | |
'概述:', '段落', response) | |
return paragraphs | |
def get_chatgpt_response(model, prompt): | |
response = "" | |
for data in model.ask(prompt): | |
response = data["message"] | |
model.delete_conversation(model.conversation_id) | |
model.reset_chat() | |
return response | |
def parse_instructions(instructions): | |
output = "" | |
for i in range(len(instructions)): | |
output += f"{i+1}. {instructions[i]}\n" | |
return output | |