Spaces:
Running
Running
from nltk.tokenize import sent_tokenize | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import torch | |
import src.exception.Exception.Exception as ExceptionCustom | |
# Use a pipeline as a high-level helper | |
from transformers import pipeline | |
METHOD = "TRANSLATE" | |
# Load models and tokenizers | |
tokenizerROMENG = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-ro-en") | |
modelROMENG = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-ro-en") | |
tokenizerENGROM = AutoTokenizer.from_pretrained("BlackKakapo/opus-mt-en-ro") | |
modelENGROM = AutoModelForSeq2SeqLM.from_pretrained("BlackKakapo/opus-mt-en-ro") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
modelROMENG.to(device) | |
modelENGROM.to(device) | |
def paraphraseTranslateMethod(requestValue: str, model: str): | |
exception = ExceptionCustom.checkForException(requestValue, METHOD) | |
if exception: | |
return "", exception | |
tokenized_sent_list = sent_tokenize(requestValue) | |
result_value = [] | |
for SENTENCE in tokenized_sent_list: | |
if model == 'roen': | |
input_ids = tokenizerROMENG(SENTENCE, return_tensors='pt').to(device) | |
output = modelROMENG.generate( | |
input_ids=input_ids.input_ids, | |
do_sample=True, | |
max_length=512, | |
top_k=90, | |
top_p=0.97, | |
early_stopping=False | |
) | |
result = tokenizerROMENG.batch_decode(output, skip_special_tokens=True)[0] | |
else: | |
input_ids = tokenizerENGROM(SENTENCE, return_tensors='pt').to(device) | |
output = modelENGROM.generate( | |
input_ids=input_ids.input_ids, | |
do_sample=True, | |
max_length=512, | |
top_k=90, | |
top_p=0.97, | |
early_stopping=False | |
) | |
result = tokenizerENGROM.batch_decode(output, skip_special_tokens=True)[0] | |
result_value.append(result) | |
return " ".join(result_value).strip(), model | |
def gemma(requestValue: str, model: str = 'Gargaz/gemma-2b-romanian-better'): | |
pipe = pipeline("text-generation", model="Gargaz/gemma-2b-romanian-better") | |
messages = [ | |
{"role": "user", "content": f"Translate the following text to Romanian using a formal tone and provide only translation: {requestValue}"}, | |
] | |
return pipe(messages) |