Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ def check_dependencies():
|
|
13 |
'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
|
14 |
'torch': ['torch'],
|
15 |
'sentencepiece': ['sentencepiece'],
|
16 |
-
'tf-keras': ['tf-keras']
|
17 |
}
|
18 |
|
19 |
installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
|
@@ -33,7 +33,7 @@ def check_dependencies():
|
|
33 |
check_dependencies()
|
34 |
|
35 |
import torch
|
36 |
-
from transformers import pipeline
|
37 |
import docx
|
38 |
import PyPDF2
|
39 |
import io
|
@@ -41,11 +41,18 @@ import io
|
|
41 |
class DocumentTranslator:
|
42 |
def __init__(self):
|
43 |
try:
|
44 |
-
# Initialize translation
|
45 |
-
self.
|
46 |
"translation",
|
47 |
model="Helsinki-NLP/opus-mt-en-ROMANCE",
|
48 |
-
framework="pt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
)
|
50 |
|
51 |
# Supported languages
|
@@ -54,7 +61,8 @@ class DocumentTranslator:
|
|
54 |
"French": "fr",
|
55 |
"Spanish": "es",
|
56 |
"Portuguese": "pt",
|
57 |
-
"Italian": "it"
|
|
|
58 |
}
|
59 |
except Exception as e:
|
60 |
print(f"Error initializing translator: {str(e)}")
|
@@ -86,6 +94,13 @@ class DocumentTranslator:
|
|
86 |
doc.save(output_filename)
|
87 |
return output_filename
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
def translate_document(self, file, source_lang, target_lang):
|
90 |
try:
|
91 |
# Create temporary directory for output
|
@@ -107,7 +122,7 @@ class DocumentTranslator:
|
|
107 |
# Translate chunks
|
108 |
translated_chunks = []
|
109 |
for chunk in chunks:
|
110 |
-
translation = self.
|
111 |
translated_chunks.append(translation)
|
112 |
|
113 |
translated_text = " ".join(translated_chunks)
|
|
|
13 |
'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
|
14 |
'torch': ['torch'],
|
15 |
'sentencepiece': ['sentencepiece'],
|
16 |
+
'tf-keras': ['tf-keras']
|
17 |
}
|
18 |
|
19 |
installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
|
|
|
33 |
check_dependencies()
|
34 |
|
35 |
import torch
|
36 |
+
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
37 |
import docx
|
38 |
import PyPDF2
|
39 |
import io
|
|
|
41 |
class DocumentTranslator:
|
42 |
def __init__(self):
|
43 |
try:
|
44 |
+
# Initialize translation models
|
45 |
+
self.romance_translator = pipeline(
|
46 |
"translation",
|
47 |
model="Helsinki-NLP/opus-mt-en-ROMANCE",
|
48 |
+
framework="pt"
|
49 |
+
)
|
50 |
+
|
51 |
+
# Initialize Hindi translator
|
52 |
+
self.hindi_translator = pipeline(
|
53 |
+
"translation",
|
54 |
+
model="Helsinki-NLP/opus-mt-en-hi",
|
55 |
+
framework="pt"
|
56 |
)
|
57 |
|
58 |
# Supported languages
|
|
|
61 |
"French": "fr",
|
62 |
"Spanish": "es",
|
63 |
"Portuguese": "pt",
|
64 |
+
"Italian": "it",
|
65 |
+
"Hindi": "hi" # Added Hindi support
|
66 |
}
|
67 |
except Exception as e:
|
68 |
print(f"Error initializing translator: {str(e)}")
|
|
|
94 |
doc.save(output_filename)
|
95 |
return output_filename
|
96 |
|
97 |
+
def translate_text(self, text, target_lang):
|
98 |
+
# Choose appropriate translator based on target language
|
99 |
+
if target_lang == "hi":
|
100 |
+
return self.hindi_translator(text)[0]['translation_text']
|
101 |
+
else:
|
102 |
+
return self.romance_translator(text)[0]['translation_text']
|
103 |
+
|
104 |
def translate_document(self, file, source_lang, target_lang):
|
105 |
try:
|
106 |
# Create temporary directory for output
|
|
|
122 |
# Translate chunks
|
123 |
translated_chunks = []
|
124 |
for chunk in chunks:
|
125 |
+
translation = self.translate_text(chunk, self.languages[target_lang])
|
126 |
translated_chunks.append(translation)
|
127 |
|
128 |
translated_text = " ".join(translated_chunks)
|