akarshan11 commited on
Commit
e1983d6
·
verified ·
1 Parent(s): 2111187

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -7
app.py CHANGED
@@ -13,7 +13,7 @@ def check_dependencies():
13
  'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
14
  'torch': ['torch'],
15
  'sentencepiece': ['sentencepiece'],
16
- 'tf-keras': ['tf-keras'] # Added tf-keras as a required package
17
  }
18
 
19
  installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
@@ -33,7 +33,7 @@ def check_dependencies():
33
  check_dependencies()
34
 
35
  import torch
36
- from transformers import pipeline
37
  import docx
38
  import PyPDF2
39
  import io
@@ -41,11 +41,18 @@ import io
41
  class DocumentTranslator:
42
  def __init__(self):
43
  try:
44
- # Initialize translation model with PyTorch backend explicitly
45
- self.translator = pipeline(
46
  "translation",
47
  model="Helsinki-NLP/opus-mt-en-ROMANCE",
48
- framework="pt" # Explicitly specify PyTorch as the backend
 
 
 
 
 
 
 
49
  )
50
 
51
  # Supported languages
@@ -54,7 +61,8 @@ class DocumentTranslator:
54
  "French": "fr",
55
  "Spanish": "es",
56
  "Portuguese": "pt",
57
- "Italian": "it"
 
58
  }
59
  except Exception as e:
60
  print(f"Error initializing translator: {str(e)}")
@@ -86,6 +94,13 @@ class DocumentTranslator:
86
  doc.save(output_filename)
87
  return output_filename
88
 
 
 
 
 
 
 
 
89
  def translate_document(self, file, source_lang, target_lang):
90
  try:
91
  # Create temporary directory for output
@@ -107,7 +122,7 @@ class DocumentTranslator:
107
  # Translate chunks
108
  translated_chunks = []
109
  for chunk in chunks:
110
- translation = self.translator(chunk)[0]['translation_text']
111
  translated_chunks.append(translation)
112
 
113
  translated_text = " ".join(translated_chunks)
 
13
  'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
14
  'torch': ['torch'],
15
  'sentencepiece': ['sentencepiece'],
16
+ 'tf-keras': ['tf-keras']
17
  }
18
 
19
  installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
 
33
  check_dependencies()
34
 
35
  import torch
36
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
37
  import docx
38
  import PyPDF2
39
  import io
 
41
  class DocumentTranslator:
42
  def __init__(self):
43
  try:
44
+ # Initialize translation models
45
+ self.romance_translator = pipeline(
46
  "translation",
47
  model="Helsinki-NLP/opus-mt-en-ROMANCE",
48
+ framework="pt"
49
+ )
50
+
51
+ # Initialize Hindi translator
52
+ self.hindi_translator = pipeline(
53
+ "translation",
54
+ model="Helsinki-NLP/opus-mt-en-hi",
55
+ framework="pt"
56
  )
57
 
58
  # Supported languages
 
61
  "French": "fr",
62
  "Spanish": "es",
63
  "Portuguese": "pt",
64
+ "Italian": "it",
65
+ "Hindi": "hi" # Added Hindi support
66
  }
67
  except Exception as e:
68
  print(f"Error initializing translator: {str(e)}")
 
94
  doc.save(output_filename)
95
  return output_filename
96
 
97
+ def translate_text(self, text, target_lang):
98
+ # Choose appropriate translator based on target language
99
+ if target_lang == "hi":
100
+ return self.hindi_translator(text)[0]['translation_text']
101
+ else:
102
+ return self.romance_translator(text)[0]['translation_text']
103
+
104
  def translate_document(self, file, source_lang, target_lang):
105
  try:
106
  # Create temporary directory for output
 
122
  # Translate chunks
123
  translated_chunks = []
124
  for chunk in chunks:
125
+ translation = self.translate_text(chunk, self.languages[target_lang])
126
  translated_chunks.append(translation)
127
 
128
  translated_text = " ".join(translated_chunks)