File size: 1,044 Bytes
f4abbca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from resources import set_start, audit_elapsedtime
from pydantic import BaseModel

#def get_model_name(languageCode: str) -> str:
#    match languageCode:
#        case "pt":
#            model_name = "Helsinki-NLP/opus-mt-pt-en"
#        case _:
#            model_name
#    
#    return model_name

def translate(text_to_translate: str) -> str:
    
    start = set_start()
    print("Initiating translation model...")
    text_size = len(text_to_translate)*2
    tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
    model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
    pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
    translated_text = pten_pipeline(text_to_translate, max_new_tokens= text_size)[0]['generated_text']

    elapsedtime=audit_elapsedtime(function="Finished translation", start=start)
    print("Translated text:", translated_text)
    return translated_text