Spaces:

ketanchaudhary88
/

DataAugmentation

Runtime error

App Files Files Community

ketanchaudhary88 commited on Nov 16, 2024

Commit

1bb47ca

verified ·

1 Parent(s): acc28aa

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -32

app.py CHANGED Viewed

@@ -1,37 +1,22 @@
-from transformers import MarianMTModel, MarianTokenizer
-# Load translation models
-en_to_hi_model_name = 'Helsinki-NLP/opus-mt-en-hi'  # English to Hindi
-hi_to_en_model_name = 'Helsinki-NLP/opus-mt-hi-en'  # Hindi to English
-# Load the models and tokenizers
-en_to_hi_model = MarianMTModel.from_pretrained(en_to_hi_model_name)
-en_to_hi_tokenizer = MarianTokenizer.from_pretrained(en_to_hi_model_name)
-hi_to_en_model = MarianMTModel.from_pretrained(hi_to_en_model_name)
-hi_to_en_tokenizer = MarianTokenizer.from_pretrained(hi_to_en_model_name)
-# Function to perform back-translation (English -> Hindi -> English)
-def back_translate(text, from_model, to_model, from_tokenizer, to_tokenizer):
     """
-    Perform back translation:
-    1. Translate from English to Hindi
-    2. Translate back from Hindi to English
     """
-    # Step 1: Translate from English to Hindi
-    encoded = from_tokenizer.encode(text, return_tensors="pt", truncation=True, padding=True)
-    translated = from_model.generate(encoded, num_beams=4, max_length=50, early_stopping=True)
-    hindi_text = to_tokenizer.decode(translated[0], skip_special_tokens=True)
-    # Step 2: Translate back from Hindi to English
-    encoded_back = to_tokenizer.encode(hindi_text, return_tensors="pt", truncation=True, padding=True)
-    back_translated = hi_to_en_model.generate(encoded_back, num_beams=4, max_length=50, early_stopping=True)
-    back_translated_text = en_to_hi_tokenizer.decode(back_translated[0], skip_special_tokens=True)
-    return back_translated_text
-# Example usage:
-original_text = "What is your address?"
-back_translated_text = back_translate(original_text, en_to_hi_model, hi_to_en_model, en_to_hi_tokenizer, hi_to_en_tokenizer)
-print("Original text:", original_text)
-print("Back-translated text:", back_translated_text)

+import nlpaug.augmenter.word as naw
+import nlpaug.augmenter.translator as nat
+# Initialize augmenters
+synonym_aug = naw.SynonymAug(aug_src='en', lang='eng')
+back_translate_en_to_hi = nat.BackTranslationAug(from_model_name='Helsinki-NLP/opus-mt-en-hi', to_model_name='Helsinki-NLP/opus-mt-hi-en')
+def augment_text(text, augmentation_type='synonym'):
     """
+    Augment text based on the specified type.
+    augmentation_type: 'synonym' for synonym replacement, 'back_translation' for back translation
     """
+    if augmentation_type == 'synonym':
+        return synonym_aug.augment(text)
+    elif augmentation_type == 'back_translation':
+        return back_translate_en_to_hi.augment(text)
+    else:
+        return text
+# Test augmentation functions
+print(augment_text("What is your address?", augmentation_type='synonym'))  # Synonym
+print(augment_text("What is your address?", augmentation_type='back_translation'))  # Back translation