File size: 2,200 Bytes
88435ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from typing import TypedDict

from neollm import MyLLM
from neollm.types import Messages, OpenAIResponse
from neollm.utils.postprocess import strip_string
from neollm.utils.preprocess import optimize_token


class TranslatorInputType(TypedDict):
    text: str


class TranslatorOuputType(TypedDict):
    text_translated: str


class Translator(MyLLM):
    """情報を抽出するMyLLM

    Notes:
        inputs:
            >>> {"text": str}
        outpus:
            >>> {"text_translated": str | None(うまくいかなかった場合)}
    """

    def _preprocess(self, inputs: TranslatorInputType) -> Messages:
        system_prompt = (
            "You are a good translator. Translate Japanese into English or English into Japanese.\n"
            "# output_format:\n<output>\n{translated text in English or Japanese}"
        )
        user_prompt = "<input>\n" f"'''{inputs['text'].strip()}'''"
        messages: Messages = [
            {"role": "system", "content": optimize_token(system_prompt)},
            {"role": "user", "content": optimize_token(user_prompt)},
        ]
        return messages

    def _ruleprocess(self, inputs: TranslatorInputType) -> None | TranslatorOuputType:
        # 入力がない場合の処理
        if inputs["text"].strip() == "":
            return {"text_translated": ""}
        return None

    def _update_settings(self) -> None:
        # 入力が多い時に16kを使う
        if self.messages is not None:
            if self.llm.count_tokens(self.messages) >= 1600:
                self.model = "gpt-3.5-turbo-16k"
            else:
                self.model = "gpt-3.5-turbo"

    def _postprocess(self, response: OpenAIResponse) -> TranslatorOuputType:
        text_translated: str = str(response.choices[0].message["content"])
        text_translated = strip_string(text=text_translated, first_character=["<output>", "<outputs>"])
        outputs: TranslatorOuputType = {"text_translated": text_translated}
        return outputs

    # 型定義のために必要
    def __call__(self, inputs: TranslatorInputType) -> TranslatorOuputType:
        outputs: TranslatorOuputType = super().__call__(inputs)
        return outputs