ketanchaudhary88 commited on
Commit
600489b
·
verified ·
1 Parent(s): 346cc84

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianMTModel, MarianTokenizer
2
+
3
+ # Load translation models
4
+ en_to_hi_model_name = 'Helsinki-NLP/opus-mt-en-hi' # English to Hindi
5
+ hi_to_en_model_name = 'Helsinki-NLP/opus-mt-hi-en' # Hindi to English
6
+
7
+ # Load the models and tokenizers
8
+ en_to_hi_model = MarianMTModel.from_pretrained(en_to_hi_model_name)
9
+ en_to_hi_tokenizer = MarianTokenizer.from_pretrained(en_to_hi_model_name)
10
+
11
+ hi_to_en_model = MarianMTModel.from_pretrained(hi_to_en_model_name)
12
+ hi_to_en_tokenizer = MarianTokenizer.from_pretrained(hi_to_en_model_name)
13
+
14
+ # Function to perform back-translation (English -> Hindi -> English)
15
+ def back_translate(text, from_model, to_model, from_tokenizer, to_tokenizer):
16
+ """
17
+ Perform back translation:
18
+ 1. Translate from English to Hindi
19
+ 2. Translate back from Hindi to English
20
+ """
21
+ # Step 1: Translate from English to Hindi
22
+ encoded = from_tokenizer.encode(text, return_tensors="pt", truncation=True, padding=True)
23
+ translated = from_model.generate(encoded, num_beams=4, max_length=50, early_stopping=True)
24
+ hindi_text = to_tokenizer.decode(translated[0], skip_special_tokens=True)
25
+
26
+ # Step 2: Translate back from Hindi to English
27
+ encoded_back = to_tokenizer.encode(hindi_text, return_tensors="pt", truncation=True, padding=True)
28
+ back_translated = hi_to_en_model.generate(encoded_back, num_beams=4, max_length=50, early_stopping=True)
29
+ back_translated_text = en_to_hi_tokenizer.decode(back_translated[0], skip_special_tokens=True)
30
+
31
+ return back_translated_text
32
+
33
+ # Example usage:
34
+ original_text = "What is your address?"
35
+ back_translated_text = back_translate(original_text, en_to_hi_model, hi_to_en_model, en_to_hi_tokenizer, hi_to_en_tokenizer)
36
+ print("Original text:", original_text)
37
+ print("Back-translated text:", back_translated_text)