Zeimoto commited on
Commit
0536b78
·
1 Parent(s): 3e48293

new portuguese model

Browse files
Files changed (1) hide show
  1. translation.py +10 -12
translation.py CHANGED
@@ -1,21 +1,19 @@
1
  from transformers import MarianMTModel, MarianTokenizer
 
2
 
3
  def get_model_name(languageCode: str) -> str:
4
- model_name = "Helsinki-NLP/opus-mt-pt-en"
 
 
 
5
  return model_name
6
 
7
- def init_translation_model():
8
- model_name = get_model_name("pt")
9
- print(model_name)
10
- model = MarianMTModel.from_pretrained(model_name)
11
- tokenizer = MarianTokenizer.from_pretrained(model_name)
12
- return model, tokenizer
13
-
14
  def get_translation(text_to_translate: str, languageCode: str) -> str:
15
- model, tokenizer = init_translation_model()
16
- inputs = tokenizer(text_to_translate, return_tensors="pt", truncation=True, padding=True)
17
- translated_ids = model.generate(**inputs)
18
- translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True)
 
19
 
20
  print("Translated text:", translated_text)
21
  return translated_text
 
1
  from transformers import MarianMTModel, MarianTokenizer
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
 
4
  def get_model_name(languageCode: str) -> str:
5
+ match languageCode:
6
+ case "pt":
7
+ model_name = "Helsinki-NLP/opus-mt-pt-en"
8
+
9
  return model_name
10
 
 
 
 
 
 
 
 
11
  def get_translation(text_to_translate: str, languageCode: str) -> str:
12
+
13
+ tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
14
+ model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
15
+ pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
16
+ translated_text = pten_pipeline(text_to_translate)
17
 
18
  print("Translated text:", translated_text)
19
  return translated_text