ketanchaudhary88 commited on
Commit
1bb47ca
·
verified ·
1 Parent(s): acc28aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -32
app.py CHANGED
@@ -1,37 +1,22 @@
1
- from transformers import MarianMTModel, MarianTokenizer
 
2
 
3
- # Load translation models
4
- en_to_hi_model_name = 'Helsinki-NLP/opus-mt-en-hi' # English to Hindi
5
- hi_to_en_model_name = 'Helsinki-NLP/opus-mt-hi-en' # Hindi to English
6
 
7
- # Load the models and tokenizers
8
- en_to_hi_model = MarianMTModel.from_pretrained(en_to_hi_model_name)
9
- en_to_hi_tokenizer = MarianTokenizer.from_pretrained(en_to_hi_model_name)
10
-
11
- hi_to_en_model = MarianMTModel.from_pretrained(hi_to_en_model_name)
12
- hi_to_en_tokenizer = MarianTokenizer.from_pretrained(hi_to_en_model_name)
13
-
14
- # Function to perform back-translation (English -> Hindi -> English)
15
- def back_translate(text, from_model, to_model, from_tokenizer, to_tokenizer):
16
  """
17
- Perform back translation:
18
- 1. Translate from English to Hindi
19
- 2. Translate back from Hindi to English
20
  """
21
- # Step 1: Translate from English to Hindi
22
- encoded = from_tokenizer.encode(text, return_tensors="pt", truncation=True, padding=True)
23
- translated = from_model.generate(encoded, num_beams=4, max_length=50, early_stopping=True)
24
- hindi_text = to_tokenizer.decode(translated[0], skip_special_tokens=True)
25
-
26
- # Step 2: Translate back from Hindi to English
27
- encoded_back = to_tokenizer.encode(hindi_text, return_tensors="pt", truncation=True, padding=True)
28
- back_translated = hi_to_en_model.generate(encoded_back, num_beams=4, max_length=50, early_stopping=True)
29
- back_translated_text = en_to_hi_tokenizer.decode(back_translated[0], skip_special_tokens=True)
30
-
31
- return back_translated_text
32
 
33
- # Example usage:
34
- original_text = "What is your address?"
35
- back_translated_text = back_translate(original_text, en_to_hi_model, hi_to_en_model, en_to_hi_tokenizer, hi_to_en_tokenizer)
36
- print("Original text:", original_text)
37
- print("Back-translated text:", back_translated_text)
 
1
+ import nlpaug.augmenter.word as naw
2
+ import nlpaug.augmenter.translator as nat
3
 
4
+ # Initialize augmenters
5
+ synonym_aug = naw.SynonymAug(aug_src='en', lang='eng')
6
+ back_translate_en_to_hi = nat.BackTranslationAug(from_model_name='Helsinki-NLP/opus-mt-en-hi', to_model_name='Helsinki-NLP/opus-mt-hi-en')
7
 
8
+ def augment_text(text, augmentation_type='synonym'):
 
 
 
 
 
 
 
 
9
  """
10
+ Augment text based on the specified type.
11
+ augmentation_type: 'synonym' for synonym replacement, 'back_translation' for back translation
 
12
  """
13
+ if augmentation_type == 'synonym':
14
+ return synonym_aug.augment(text)
15
+ elif augmentation_type == 'back_translation':
16
+ return back_translate_en_to_hi.augment(text)
17
+ else:
18
+ return text
 
 
 
 
 
19
 
20
+ # Test augmentation functions
21
+ print(augment_text("What is your address?", augmentation_type='synonym')) # Synonym
22
+ print(augment_text("What is your address?", augmentation_type='back_translation')) # Back translation