Spaces:
Runtime error
Runtime error
from transformers import MarianMTModel, MarianTokenizer | |
# Load translation models | |
en_to_hi_model_name = 'Helsinki-NLP/opus-mt-en-hi' # English to Hindi | |
hi_to_en_model_name = 'Helsinki-NLP/opus-mt-hi-en' # Hindi to English | |
# Load the models and tokenizers | |
en_to_hi_model = MarianMTModel.from_pretrained(en_to_hi_model_name) | |
en_to_hi_tokenizer = MarianTokenizer.from_pretrained(en_to_hi_model_name) | |
hi_to_en_model = MarianMTModel.from_pretrained(hi_to_en_model_name) | |
hi_to_en_tokenizer = MarianTokenizer.from_pretrained(hi_to_en_model_name) | |
# Function to perform back-translation (English -> Hindi -> English) | |
def back_translate(text, from_model, to_model, from_tokenizer, to_tokenizer): | |
""" | |
Perform back translation: | |
1. Translate from English to Hindi | |
2. Translate back from Hindi to English | |
""" | |
# Step 1: Translate from English to Hindi | |
encoded = from_tokenizer.encode(text, return_tensors="pt", truncation=True, padding=True) | |
translated = from_model.generate(encoded, num_beams=4, max_length=50, early_stopping=True) | |
hindi_text = to_tokenizer.decode(translated[0], skip_special_tokens=True) | |
# Step 2: Translate back from Hindi to English | |
encoded_back = to_tokenizer.encode(hindi_text, return_tensors="pt", truncation=True, padding=True) | |
back_translated = hi_to_en_model.generate(encoded_back, num_beams=4, max_length=50, early_stopping=True) | |
back_translated_text = en_to_hi_tokenizer.decode(back_translated[0], skip_special_tokens=True) | |
return back_translated_text | |
# Example usage: | |
original_text = "What is your address?" | |
back_translated_text = back_translate(original_text, en_to_hi_model, hi_to_en_model, en_to_hi_tokenizer, hi_to_en_tokenizer) | |
print("Original text:", original_text) | |
print("Back-translated text:", back_translated_text) | |