Spaces:

zxsipola123456
/

article

Runtime error

File size: 7,429 Bytes

591004d

#!python
# -*- coding: utf-8 -*-
# @author: Kun

import re
from global_config import lang_opt, llm_model_opt

if "openai" == llm_model_opt:
    from utils.openai_util import get_api_response
elif "vicuna" == llm_model_opt:
    from utils.vicuna_util import get_api_response
elif "chatglm" == llm_model_opt:
    from utils.chatglm_util import get_api_response
elif "baichuan" == llm_model_opt:
    from utils.baichuan_util import get_api_response
elif "aquila" == llm_model_opt:
    from utils.aquila_util import get_api_response
elif "falcon" == llm_model_opt:
    from utils.falcon_util import get_api_response
else:
    raise Exception("not supported llm model name: {}".format(llm_model_opt))


def get_content_between_a_b(a, b, text):
    if "en" == lang_opt:
        if "vicuna" == llm_model_opt:
            return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip()
        elif "openai" == llm_model_opt:
            return re.search(f"{a}(.*?)\n{b}", text, re.DOTALL).group(1).strip()
        elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
            return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip()
        else:
            raise Exception(
                "not supported llm model name: {}".format(llm_model_opt))

    elif lang_opt in ["zh1", "zh2"]:
        if "vicuna" == llm_model_opt:
            match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
        elif "openai" == llm_model_opt:
            match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL)
        elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
            match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
        else:
            raise Exception(
                "not supported llm model name: {}".format(llm_model_opt))

        if match:
            return match.group(1).strip()
        else:
            if "1" in a or "2" in a or "3" in a:
                a = ''.join(a.split(" "))
            if "1" in b or "2" in b or "3" in b:
                b = "".join(b.split(" "))

            if "vicuna" == llm_model_opt:
                match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
            elif "openai" == llm_model_opt:
                match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL)
            elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
                match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
            else:
                raise Exception(
                    "not supported llm model name: {}".format(llm_model_opt))

            if match:
                return match.group(1).strip()
            else:
                # 处理找不到匹配内容的情况
                return "翻译时出现错误请重试"  # 或者返回其他默认值或采取其他的处理方式
    else:
        raise Exception(f"not supported language: {lang_opt}")


def get_init(init_text=None, text=None, response_file=None, model=None, tokenizer=None):
    """
    init_text: if the title, outline, and the first 3 paragraphs are given in a .txt file, directly read
    text: if no .txt file is given, use init prompt to generate
    """
    if not init_text:
        response = get_api_response(model, tokenizer, text)
        print("response: {}".format(response))

        if response_file:
            with open(response_file, 'a', encoding='utf-8') as f:
                f.write(f"Init output here:\n{response}\n\n")
    else:
        with open(init_text, 'r', encoding='utf-8') as f:
            response = f.read()
        f.close()
    paragraphs = {
        "name": "",
        "Outline": "",
        "Paragraph 1": "",
        "Paragraph 2": "",
        "Paragraph 3": "",
        "Summary": "",
        "Instruction 1": "",
        "Instruction 2": "",
        "Instruction 3": ""
    }

    if "en" == lang_opt:
        paragraphs['name'] = get_content_between_a_b(
            'Name:', 'Outline', response)

        paragraphs['Paragraph 1'] = get_content_between_a_b(
            'Paragraph 1:', 'Paragraph 2:', response)
        paragraphs['Paragraph 2'] = get_content_between_a_b(
            'Paragraph 2:', 'Paragraph 3:', response)
        paragraphs['Paragraph 3'] = get_content_between_a_b(
            'Paragraph 3:', 'Summary', response)
        paragraphs['Summary'] = get_content_between_a_b(
            'Summary:', 'Instruction 1', response)
        paragraphs['Instruction 1'] = get_content_between_a_b(
            'Instruction 1:', 'Instruction 2', response)
        paragraphs['Instruction 2'] = get_content_between_a_b(
            'Instruction 2:', 'Instruction 3', response)
        lines = response.splitlines()
        # content of Instruction 3 may be in the same line with I3 or in the next line
        if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
            paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):]
        elif lines[-1] != '\n':
            paragraphs['Instruction 3'] = lines[-1]
        # Sometimes it gives Chapter outline, sometimes it doesn't
        for line in lines:
            if line.startswith('Chapter'):
                paragraphs['Outline'] = get_content_between_a_b(
                    'Outline:', 'Chapter', response)
                break
        if paragraphs['Outline'] == '':
            paragraphs['Outline'] = get_content_between_a_b(
                'Outline:', 'Paragraph', response)

    elif lang_opt in ["zh1", "zh2"]:
        paragraphs['name'] = get_content_between_a_b('名称：', '概述：', response)

        paragraphs['Paragraph 1'] = get_content_between_a_b(
            '段落 1：', '段落 2：', response)
        paragraphs['Paragraph 2'] = get_content_between_a_b(
            '段落 2：', '段落 3：', response)
        paragraphs['Paragraph 3'] = get_content_between_a_b(
            '段落 3：', '总结：', response)
        paragraphs['Summary'] = get_content_between_a_b(
            '总结：', '指令 1', response)
        paragraphs['Instruction 1'] = get_content_between_a_b(
            '指令 1：', '指令 2：', response)
        paragraphs['Instruction 2'] = get_content_between_a_b(
            '指令 2：', '指令 3：', response)
        lines = response.splitlines()
        # content of Instruction 3 may be in the same line with I3 or in the next line
        if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
            paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):]
        elif lines[-1] != '\n':
            paragraphs['Instruction 3'] = lines[-1]
        # Sometimes it gives Chapter outline, sometimes it doesn't
        for line in lines:
            if line.startswith('Chapter'):
                paragraphs['Outline'] = get_content_between_a_b(
                    '概述：', 'Chapter', response)
                break
        if paragraphs['Outline'] == '':
            paragraphs['Outline'] = get_content_between_a_b(
                '概述：', '段落', response)

    return paragraphs


def get_chatgpt_response(model, prompt):
    response = ""
    for data in model.ask(prompt):
        response = data["message"]
    model.delete_conversation(model.conversation_id)
    model.reset_chat()
    return response


def parse_instructions(instructions):
    output = ""
    for i in range(len(instructions)):
        output += f"{i+1}. {instructions[i]}\n"
    return output