ketanchaudhary88's picture
Create app.py
600489b verified
raw
history blame
1.81 kB
from transformers import MarianMTModel, MarianTokenizer
# Load translation models
en_to_hi_model_name = 'Helsinki-NLP/opus-mt-en-hi' # English to Hindi
hi_to_en_model_name = 'Helsinki-NLP/opus-mt-hi-en' # Hindi to English
# Load the models and tokenizers
en_to_hi_model = MarianMTModel.from_pretrained(en_to_hi_model_name)
en_to_hi_tokenizer = MarianTokenizer.from_pretrained(en_to_hi_model_name)
hi_to_en_model = MarianMTModel.from_pretrained(hi_to_en_model_name)
hi_to_en_tokenizer = MarianTokenizer.from_pretrained(hi_to_en_model_name)
# Function to perform back-translation (English -> Hindi -> English)
def back_translate(text, from_model, to_model, from_tokenizer, to_tokenizer):
"""
Perform back translation:
1. Translate from English to Hindi
2. Translate back from Hindi to English
"""
# Step 1: Translate from English to Hindi
encoded = from_tokenizer.encode(text, return_tensors="pt", truncation=True, padding=True)
translated = from_model.generate(encoded, num_beams=4, max_length=50, early_stopping=True)
hindi_text = to_tokenizer.decode(translated[0], skip_special_tokens=True)
# Step 2: Translate back from Hindi to English
encoded_back = to_tokenizer.encode(hindi_text, return_tensors="pt", truncation=True, padding=True)
back_translated = hi_to_en_model.generate(encoded_back, num_beams=4, max_length=50, early_stopping=True)
back_translated_text = en_to_hi_tokenizer.decode(back_translated[0], skip_special_tokens=True)
return back_translated_text
# Example usage:
original_text = "What is your address?"
back_translated_text = back_translate(original_text, en_to_hi_model, hi_to_en_model, en_to_hi_tokenizer, hi_to_en_tokenizer)
print("Original text:", original_text)
print("Back-translated text:", back_translated_text)