File size: 1,655 Bytes
5de8802
 
 
 
 
 
 
 
 
76864f3
5de8802
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76864f3
5de8802
 
 
 
 
 
 
 
 
1e6330e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from flores200_codes import flores_codes

model_dict = {}


def load_models(model_name: str):
    # build model and tokenizer
    model_name_dict = {
    # Here we aim to use only Distilled model 
        "nllb-distilled-1.3B": "facebook/nllb-200-distilled-1.3B",
    }[model_name]

    print("\tLoading model: %s" % model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name_dict)
    tokenizer = AutoTokenizer.from_pretrained(model_name_dict)
    model_dict[model_name + "_model"] = model
    model_dict[model_name + "_tokenizer"] = tokenizer

    return model_dict


def translation(model_name: str, source, target, text: str):

    model_dict = load_models(model_name)

    source = flores_codes[source]
    target = flores_codes[target]

    model = model_dict[model_name + "_model"]
    tokenizer = model_dict[model_name + "_tokenizer"]

    translator = pipeline(
        "translation",
        model=model,
        tokenizer=tokenizer,
        src_lang=source,
        tgt_lang=target,
    )
    output = translator(text, max_length=400)

    output = output[0]["translation_text"]
    result = {
        "result": output,
    }

    return result

'''NLLB_EXAMPLES = [
    ["nllb-distilled-1.3B", "English", "Shan", "Hello, how are you today?"],
    ["nllb-distilled-1.3B", "Shan", "English", "မႂ်ႇသုင်ၶႃႈ ယူႇလီယူႇၶႃႈၼေႃႈ"],
    [
        "nllb-distilled-1.3B",
        "English",
        "Shan",
        "Forming Myanmar’s New Political System Will Remain an Ideal but Never in Practicality",
    ],
]
'''