Spaces:

akarshan11
/

garrry

Running

App Files Files Community

akarshan11 commited on Feb 19

Commit

e1983d6

verified ·

1 Parent(s): 2111187

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -7

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ def check_dependencies():
         'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
         'torch': ['torch'],
         'sentencepiece': ['sentencepiece'],
-        'tf-keras': ['tf-keras']  # Added tf-keras as a required package
     }
     installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
@@ -33,7 +33,7 @@ def check_dependencies():
 check_dependencies()
 import torch
-from transformers import pipeline
 import docx
 import PyPDF2
 import io
@@ -41,11 +41,18 @@ import io
 class DocumentTranslator:
     def __init__(self):
         try:
-            # Initialize translation model with PyTorch backend explicitly
-            self.translator = pipeline(
                 "translation",
                 model="Helsinki-NLP/opus-mt-en-ROMANCE",
-                framework="pt"  # Explicitly specify PyTorch as the backend
             )
             # Supported languages
@@ -54,7 +61,8 @@ class DocumentTranslator:
                 "French": "fr",
                 "Spanish": "es",
                 "Portuguese": "pt",
-                "Italian": "it"
             }
         except Exception as e:
             print(f"Error initializing translator: {str(e)}")
@@ -86,6 +94,13 @@ class DocumentTranslator:
         doc.save(output_filename)
         return output_filename
     def translate_document(self, file, source_lang, target_lang):
         try:
             # Create temporary directory for output
@@ -107,7 +122,7 @@ class DocumentTranslator:
             # Translate chunks
             translated_chunks = []
             for chunk in chunks:
-                translation = self.translator(chunk)[0]['translation_text']
                 translated_chunks.append(translation)
             translated_text = " ".join(translated_chunks)

         'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
         'torch': ['torch'],
         'sentencepiece': ['sentencepiece'],
+        'tf-keras': ['tf-keras']
     }
     installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
 check_dependencies()
 import torch
+from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 import docx
 import PyPDF2
 import io
 class DocumentTranslator:
     def __init__(self):
         try:
+            # Initialize translation models
+            self.romance_translator = pipeline(
                 "translation",
                 model="Helsinki-NLP/opus-mt-en-ROMANCE",
+                framework="pt"
+            )
+            # Initialize Hindi translator
+            self.hindi_translator = pipeline(
+                "translation",
+                model="Helsinki-NLP/opus-mt-en-hi",
+                framework="pt"
             )
             # Supported languages
                 "French": "fr",
                 "Spanish": "es",
                 "Portuguese": "pt",
+                "Italian": "it",
+                "Hindi": "hi"  # Added Hindi support
             }
         except Exception as e:
             print(f"Error initializing translator: {str(e)}")
         doc.save(output_filename)
         return output_filename
+    def translate_text(self, text, target_lang):
+        # Choose appropriate translator based on target language
+        if target_lang == "hi":
+            return self.hindi_translator(text)[0]['translation_text']
+        else:
+            return self.romance_translator(text)[0]['translation_text']
     def translate_document(self, file, source_lang, target_lang):
         try:
             # Create temporary directory for output
             # Translate chunks
             translated_chunks = []
             for chunk in chunks:
+                translation = self.translate_text(chunk, self.languages[target_lang])
                 translated_chunks.append(translation)
             translated_text = " ".join(translated_chunks)