akarshan11 commited on
Commit
9dcbee4
·
verified ·
1 Parent(s): 10b9c24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -159
app.py CHANGED
@@ -1,159 +1,159 @@
1
- import gradio as gr
2
- import sys
3
- import pkg_resources
4
- import tempfile
5
- import os
6
- from pathlib import Path
7
-
8
-
9
- def check_dependencies():
10
- required_packages = {
11
- 'gradio': ['gradio'],
12
- 'transformers': ['transformers'],
13
- 'python-docx': ['python-docx', 'python_docx', 'docx'],
14
- 'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
15
- 'torch': ['torch'],
16
- 'sentencepiece': ['sentencepiece']
17
- }
18
-
19
- installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
20
- missing = []
21
-
22
- for package, variations in required_packages.items():
23
- if not any(variation.lower() in installed for variation in variations):
24
- missing.append(package)
25
-
26
- if missing:
27
- print("Missing required packages. Please install:")
28
- for pkg in missing:
29
- print(f"pip install {pkg}")
30
- sys.exit(1)
31
-
32
-
33
- # Check dependencies before importing
34
- check_dependencies()
35
-
36
- from transformers import pipeline
37
- import docx
38
- import PyPDF2
39
- import io
40
-
41
-
42
- class DocumentTranslator:
43
- def __init__(self):
44
- try:
45
- # Initialize translation model
46
- self.translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ROMANCE")
47
-
48
- # Supported languages
49
- self.languages = {
50
- "English": "en",
51
- "French": "fr",
52
- "Spanish": "es",
53
- "Portuguese": "pt",
54
- "Italian": "it"
55
- }
56
- except Exception as e:
57
- print(f"Error initializing translator: {str(e)}")
58
- print("Please make sure all required packages are installed:")
59
- print("pip install transformers torch sentencepiece python-docx PyPDF2 gradio")
60
- raise
61
-
62
- def extract_text_from_docx(self, file):
63
- doc = docx.Document(file)
64
- text = []
65
- for paragraph in doc.paragraphs:
66
- text.append(paragraph.text)
67
- return "\n".join(text)
68
-
69
- def extract_text_from_pdf(self, file):
70
- pdf_reader = PyPDF2.PdfReader(file)
71
- text = []
72
- for page in pdf_reader.pages:
73
- text.append(page.extract_text())
74
- return "\n".join(text)
75
-
76
- def create_translated_docx(self, original_text, translated_text, output_filename):
77
- doc = docx.Document()
78
- paragraphs = translated_text.split("\n")
79
- for para in paragraphs:
80
- if para.strip():
81
- doc.add_paragraph(para)
82
-
83
- doc.save(output_filename)
84
- return output_filename
85
-
86
- def translate_document(self, file, source_lang, target_lang):
87
- try:
88
- # Create temporary directory for output
89
- temp_dir = tempfile.mkdtemp()
90
- output_filename = os.path.join(temp_dir, "translated_document.docx")
91
-
92
- # Extract text based on file type
93
- if file.name.endswith('.docx'):
94
- text = self.extract_text_from_docx(file)
95
- elif file.name.endswith('.pdf'):
96
- text = self.extract_text_from_pdf(file)
97
- else:
98
- return None, "Unsupported file format. Please use .docx or .pdf"
99
-
100
- # Split text into chunks to handle long documents
101
- chunk_size = 500
102
- chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
103
-
104
- # Translate chunks
105
- translated_chunks = []
106
- for chunk in chunks:
107
- translation = self.translator(chunk)[0]['translation_text']
108
- translated_chunks.append(translation)
109
-
110
- translated_text = " ".join(translated_chunks)
111
-
112
- # Create new document with translation
113
- output_file = self.create_translated_docx(text, translated_text, output_filename)
114
-
115
- return output_file, "Translation completed successfully!"
116
-
117
- except Exception as e:
118
- return None, f"Error during translation: {str(e)}"
119
-
120
-
121
- # Create Gradio interface
122
- def create_translation_interface():
123
- try:
124
- translator = DocumentTranslator()
125
-
126
- def translate_file(file, source_lang, target_lang):
127
- if file is None:
128
- return None, "Please upload a file"
129
- return translator.translate_document(file, source_lang, target_lang)
130
-
131
- iface = gr.Interface(
132
- fn=translate_file,
133
- inputs=[
134
- gr.File(label="Upload Document (.docx or .pdf)"),
135
- gr.Dropdown(choices=list(translator.languages.keys()), label="Source Language"),
136
- gr.Dropdown(choices=list(translator.languages.keys()), label="Target Language")
137
- ],
138
- outputs=[
139
- gr.File(label="Download Translated Document"),
140
- gr.Textbox(label="Status")
141
- ],
142
- title="Document Translation System",
143
- description="Upload a document (.docx or .pdf) and select source and target languages for translation.",
144
- theme="default"
145
- )
146
-
147
- return iface
148
- except Exception as e:
149
- print(f"Error creating interface: {str(e)}")
150
- sys.exit(1)
151
-
152
-
153
- if __name__ == "__main__":
154
- print("Initializing translation system...")
155
- print("Checking dependencies...")
156
- check_dependencies()
157
- print("Starting Gradio interface...")
158
- iface = create_translation_interface()
159
- iface.launch(share=True) # Added share=True to create a public link
 
1
+ import gradio as gr
2
+ import sys
3
+ import pkg_resources
4
+ import tempfile
5
+ import os
6
+ from pathlib import Path
7
+
8
+ def check_dependencies():
9
+ required_packages = {
10
+ 'gradio': ['gradio'],
11
+ 'transformers': ['transformers'],
12
+ 'python-docx': ['python-docx', 'python_docx', 'docx'],
13
+ 'PyPDF2': ['PyPDF2', 'pypdf2', 'pypdf'],
14
+ 'torch': ['torch'],
15
+ 'sentencepiece': ['sentencepiece'],
16
+ 'tf-keras': ['tf-keras'] # Added tf-keras as a required package
17
+ }
18
+
19
+ installed = {pkg.key.lower() for pkg in pkg_resources.working_set}
20
+ missing = []
21
+
22
+ for package, variations in required_packages.items():
23
+ if not any(variation.lower() in installed for variation in variations):
24
+ missing.append(package)
25
+
26
+ if missing:
27
+ print("Missing required packages. Please install:")
28
+ for pkg in missing:
29
+ print(f"pip install {pkg}")
30
+ sys.exit(1)
31
+
32
+ # Check dependencies before importing
33
+ check_dependencies()
34
+
35
+ import torch
36
+ from transformers import pipeline
37
+ import docx
38
+ import PyPDF2
39
+ import io
40
+
41
+ class DocumentTranslator:
42
+ def __init__(self):
43
+ try:
44
+ # Initialize translation model with PyTorch backend explicitly
45
+ self.translator = pipeline(
46
+ "translation",
47
+ model="Helsinki-NLP/opus-mt-en-ROMANCE",
48
+ framework="pt" # Explicitly specify PyTorch as the backend
49
+ )
50
+
51
+ # Supported languages
52
+ self.languages = {
53
+ "English": "en",
54
+ "French": "fr",
55
+ "Spanish": "es",
56
+ "Portuguese": "pt",
57
+ "Italian": "it"
58
+ }
59
+ except Exception as e:
60
+ print(f"Error initializing translator: {str(e)}")
61
+ print("Please make sure all required packages are installed:")
62
+ print("pip install transformers torch sentencepiece python-docx PyPDF2 gradio tf-keras")
63
+ raise
64
+
65
+ def extract_text_from_docx(self, file):
66
+ doc = docx.Document(file)
67
+ text = []
68
+ for paragraph in doc.paragraphs:
69
+ text.append(paragraph.text)
70
+ return "\n".join(text)
71
+
72
+ def extract_text_from_pdf(self, file):
73
+ pdf_reader = PyPDF2.PdfReader(file)
74
+ text = []
75
+ for page in pdf_reader.pages:
76
+ text.append(page.extract_text())
77
+ return "\n".join(text)
78
+
79
+ def create_translated_docx(self, original_text, translated_text, output_filename):
80
+ doc = docx.Document()
81
+ paragraphs = translated_text.split("\n")
82
+ for para in paragraphs:
83
+ if para.strip():
84
+ doc.add_paragraph(para)
85
+
86
+ doc.save(output_filename)
87
+ return output_filename
88
+
89
+ def translate_document(self, file, source_lang, target_lang):
90
+ try:
91
+ # Create temporary directory for output
92
+ temp_dir = tempfile.mkdtemp()
93
+ output_filename = os.path.join(temp_dir, "translated_document.docx")
94
+
95
+ # Extract text based on file type
96
+ if file.name.endswith('.docx'):
97
+ text = self.extract_text_from_docx(file)
98
+ elif file.name.endswith('.pdf'):
99
+ text = self.extract_text_from_pdf(file)
100
+ else:
101
+ return None, "Unsupported file format. Please use .docx or .pdf"
102
+
103
+ # Split text into chunks to handle long documents
104
+ chunk_size = 500
105
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
106
+
107
+ # Translate chunks
108
+ translated_chunks = []
109
+ for chunk in chunks:
110
+ translation = self.translator(chunk)[0]['translation_text']
111
+ translated_chunks.append(translation)
112
+
113
+ translated_text = " ".join(translated_chunks)
114
+
115
+ # Create new document with translation
116
+ output_file = self.create_translated_docx(text, translated_text, output_filename)
117
+
118
+ return output_file, "Translation completed successfully!"
119
+
120
+ except Exception as e:
121
+ return None, f"Error during translation: {str(e)}"
122
+
123
+ def create_translation_interface():
124
+ try:
125
+ translator = DocumentTranslator()
126
+
127
+ def translate_file(file, source_lang, target_lang):
128
+ if file is None:
129
+ return None, "Please upload a file"
130
+ return translator.translate_document(file, source_lang, target_lang)
131
+
132
+ iface = gr.Interface(
133
+ fn=translate_file,
134
+ inputs=[
135
+ gr.File(label="Upload Document (.docx or .pdf)"),
136
+ gr.Dropdown(choices=list(translator.languages.keys()), label="Source Language"),
137
+ gr.Dropdown(choices=list(translator.languages.keys()), label="Target Language")
138
+ ],
139
+ outputs=[
140
+ gr.File(label="Download Translated Document"),
141
+ gr.Textbox(label="Status")
142
+ ],
143
+ title="Document Translation System",
144
+ description="Upload a document (.docx or .pdf) and select source and target languages for translation.",
145
+ theme="default"
146
+ )
147
+
148
+ return iface
149
+ except Exception as e:
150
+ print(f"Error creating interface: {str(e)}")
151
+ sys.exit(1)
152
+
153
+ if __name__ == "__main__":
154
+ print("Initializing translation system...")
155
+ print("Checking dependencies...")
156
+ check_dependencies()
157
+ print("Starting Gradio interface...")
158
+ iface = create_translation_interface()
159
+ iface.launch(share=True)