Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import tempfile
|
4 |
-
from transformers import pipeline
|
5 |
from deep_translator import GoogleTranslator
|
6 |
from langdetect import detect
|
7 |
from fpdf import FPDF
|
@@ -30,6 +30,9 @@ def detect_language(text):
|
|
30 |
return "en" # Default to English if detection fails
|
31 |
|
32 |
def translate_text(text, target_language):
|
|
|
|
|
|
|
33 |
source_lang = detect_language(text)
|
34 |
|
35 |
# Skip translation if source and target are the same
|
@@ -42,20 +45,16 @@ def translate_text(text, target_language):
|
|
42 |
translation = translator.translate(text)
|
43 |
return translation
|
44 |
except Exception as e:
|
45 |
-
|
46 |
-
try:
|
47 |
-
model_name = f"Helsinki-NLP/opus-mt-{source_lang}-{LANGUAGES[target_language]}"
|
48 |
-
translator = pipeline("translation", model=model_name)
|
49 |
-
translation = translator(text, max_length=500)
|
50 |
-
return translation[0]['translation_text']
|
51 |
-
except:
|
52 |
-
return f"Translation failed: {str(e)}"
|
53 |
|
54 |
def extract_text_from_document(file_path):
|
|
|
|
|
|
|
55 |
_, file_extension = os.path.splitext(file_path)
|
56 |
|
57 |
if file_extension.lower() == '.txt':
|
58 |
-
with open(file_path, 'r', encoding='utf-8') as file:
|
59 |
return file.read()
|
60 |
elif file_extension.lower() in ['.docx', '.doc']:
|
61 |
return docx2txt.process(file_path)
|
@@ -71,7 +70,10 @@ def text_to_pdf(text, output_path):
|
|
71 |
lines = text.split('\n')
|
72 |
for line in lines:
|
73 |
# Handle non-ASCII characters
|
74 |
-
|
|
|
|
|
|
|
75 |
|
76 |
pdf.output(output_path)
|
77 |
return output_path
|
@@ -80,8 +82,8 @@ def translate_and_save(input_text, input_file, target_language):
|
|
80 |
# Determine input source (text or file)
|
81 |
if input_text:
|
82 |
text_to_translate = input_text
|
83 |
-
elif input_file:
|
84 |
-
text_to_translate = extract_text_from_document(input_file
|
85 |
else:
|
86 |
return None, "Please provide either text or a document for translation."
|
87 |
|
@@ -96,27 +98,35 @@ def translate_and_save(input_text, input_file, target_language):
|
|
96 |
|
97 |
return pdf_path, translated_text
|
98 |
|
99 |
-
# Create Gradio interface
|
100 |
with gr.Blocks(title="Context-Aware Translation Tool") as demo:
|
101 |
gr.Markdown("# Context-Aware Language Translation")
|
102 |
gr.Markdown("This tool translates text while preserving context, idioms, and phrases.")
|
103 |
|
104 |
with gr.Row():
|
105 |
with gr.Column():
|
|
|
106 |
input_text = gr.Textbox(label="Enter text to translate", lines=5)
|
107 |
-
input_file = gr.File(label="Or upload a document (.txt, .docx)")
|
108 |
-
target_language = gr.Dropdown(
|
|
|
|
|
|
|
|
|
109 |
translate_button = gr.Button("Translate")
|
110 |
|
111 |
with gr.Column():
|
|
|
112 |
output_text = gr.Textbox(label="Translation", lines=5)
|
113 |
output_pdf = gr.File(label="Download as PDF")
|
114 |
|
|
|
115 |
translate_button.click(
|
116 |
fn=translate_and_save,
|
117 |
inputs=[input_text, input_file, target_language],
|
118 |
outputs=[output_pdf, output_text]
|
119 |
)
|
120 |
|
|
|
121 |
if __name__ == "__main__":
|
122 |
-
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
import tempfile
|
4 |
+
from transformers import pipeline
|
5 |
from deep_translator import GoogleTranslator
|
6 |
from langdetect import detect
|
7 |
from fpdf import FPDF
|
|
|
30 |
return "en" # Default to English if detection fails
|
31 |
|
32 |
def translate_text(text, target_language):
|
33 |
+
if not text or not target_language:
|
34 |
+
return "No text to translate"
|
35 |
+
|
36 |
source_lang = detect_language(text)
|
37 |
|
38 |
# Skip translation if source and target are the same
|
|
|
45 |
translation = translator.translate(text)
|
46 |
return translation
|
47 |
except Exception as e:
|
48 |
+
return f"Translation failed: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
def extract_text_from_document(file_path):
|
51 |
+
if not file_path:
|
52 |
+
return ""
|
53 |
+
|
54 |
_, file_extension = os.path.splitext(file_path)
|
55 |
|
56 |
if file_extension.lower() == '.txt':
|
57 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as file:
|
58 |
return file.read()
|
59 |
elif file_extension.lower() in ['.docx', '.doc']:
|
60 |
return docx2txt.process(file_path)
|
|
|
70 |
lines = text.split('\n')
|
71 |
for line in lines:
|
72 |
# Handle non-ASCII characters
|
73 |
+
try:
|
74 |
+
pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1'))
|
75 |
+
except Exception:
|
76 |
+
pdf.multi_cell(0, 10, "[Text contains unsupported characters]")
|
77 |
|
78 |
pdf.output(output_path)
|
79 |
return output_path
|
|
|
82 |
# Determine input source (text or file)
|
83 |
if input_text:
|
84 |
text_to_translate = input_text
|
85 |
+
elif input_file is not None:
|
86 |
+
text_to_translate = extract_text_from_document(input_file)
|
87 |
else:
|
88 |
return None, "Please provide either text or a document for translation."
|
89 |
|
|
|
98 |
|
99 |
return pdf_path, translated_text
|
100 |
|
101 |
+
# Create Gradio interface with proper component configurations
|
102 |
with gr.Blocks(title="Context-Aware Translation Tool") as demo:
|
103 |
gr.Markdown("# Context-Aware Language Translation")
|
104 |
gr.Markdown("This tool translates text while preserving context, idioms, and phrases.")
|
105 |
|
106 |
with gr.Row():
|
107 |
with gr.Column():
|
108 |
+
# Input components with explicit types
|
109 |
input_text = gr.Textbox(label="Enter text to translate", lines=5)
|
110 |
+
input_file = gr.File(label="Or upload a document (.txt, .docx)", type="filepath")
|
111 |
+
target_language = gr.Dropdown(
|
112 |
+
label="Target Language",
|
113 |
+
choices=list(LANGUAGES.keys()),
|
114 |
+
value="English"
|
115 |
+
)
|
116 |
translate_button = gr.Button("Translate")
|
117 |
|
118 |
with gr.Column():
|
119 |
+
# Output components with explicit types
|
120 |
output_text = gr.Textbox(label="Translation", lines=5)
|
121 |
output_pdf = gr.File(label="Download as PDF")
|
122 |
|
123 |
+
# Connect the components with properly typed inputs and outputs
|
124 |
translate_button.click(
|
125 |
fn=translate_and_save,
|
126 |
inputs=[input_text, input_file, target_language],
|
127 |
outputs=[output_pdf, output_text]
|
128 |
)
|
129 |
|
130 |
+
# Launch with specific options to avoid errors
|
131 |
if __name__ == "__main__":
|
132 |
+
demo.launch(show_error=True)
|