Spaces:
Running
Running
import gradio as gr | |
import os | |
import tempfile | |
from deep_translator import GoogleTranslator | |
from langdetect import detect | |
from fpdf import FPDF | |
import docx2txt | |
# Dictionary of language codes for translation | |
LANGUAGES = { | |
"English": "en", | |
"Spanish": "es", | |
"French": "fr", | |
"German": "de", | |
"Italian": "it", | |
"Portuguese": "pt", | |
"Russian": "ru", | |
"Chinese": "zh", | |
"Japanese": "ja", | |
"Korean": "ko", | |
"Arabic": "ar", | |
"Hindi": "hi" | |
} | |
def detect_language(text): | |
try: | |
return detect(text) | |
except: | |
return "en" # Default to English if detection fails | |
def translate_text(text, target_language): | |
if not text or not target_language: | |
return "No text to translate" | |
source_lang = detect_language(text) | |
if source_lang == LANGUAGES[target_language]: | |
return text | |
try: | |
translator = GoogleTranslator(source=source_lang, target=LANGUAGES[target_language]) | |
translation = translator.translate(text) | |
return translation | |
except Exception as e: | |
return f"Translation failed: {str(e)}" | |
def extract_text_from_document(file): | |
if not file: | |
return "" | |
file_path = file.name # Gradio file object provides a 'name' attribute | |
_, file_extension = os.path.splitext(file_path) | |
if file_extension.lower() == '.txt': | |
with open(file_path, 'r', encoding='utf-8', errors='replace') as f: | |
return f.read() | |
elif file_extension.lower() in ['.docx', '.doc']: | |
return docx2txt.process(file_path) | |
else: | |
return "Unsupported file format. Please upload a .txt or .docx file." | |
def text_to_pdf(text, output_path): | |
pdf = FPDF() | |
pdf.add_page() | |
pdf.set_font("Arial", size=12) | |
lines = text.split('\n') | |
for line in lines: | |
try: | |
pdf.multi_cell(0, 10, line.encode('latin-1', 'replace').decode('latin-1')) | |
except Exception: | |
pdf.multi_cell(0, 10, "[Text contains unsupported characters]") | |
pdf.output(output_path) | |
return output_path | |
def translate_and_save(input_text, input_file, target_language): | |
if input_text: | |
text_to_translate = input_text | |
elif input_file is not None: | |
text_to_translate = extract_text_from_document(input_file) | |
else: | |
return None, "Please provide either text or a document for translation." | |
translated_text = translate_text(text_to_translate, target_language) | |
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as temp_pdf: | |
pdf_path = temp_pdf.name | |
text_to_pdf(translated_text, pdf_path) | |
return pdf_path, translated_text | |
# Create Gradio interface with explicit types | |
with gr.Blocks(title="Context-Aware Translation Tool") as demo: | |
gr.Markdown("# Context-Aware Language Translation") | |
gr.Markdown("This tool translates text while preserving context, idioms, and phrases.") | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox(label="Enter text to translate", lines=5, placeholder="Type your text here") | |
input_file = gr.File(label="Or upload a document (.txt, .docx)", file_types=[".txt", ".docx"]) | |
target_language = gr.Dropdown( | |
label="Target Language", | |
choices=list(LANGUAGES.keys()), | |
value="English" | |
) | |
translate_button = gr.Button("Translate") | |
with gr.Column(): | |
output_text = gr.Textbox(label="Translation", lines=5, interactive=False) | |
output_pdf = gr.File(label="Download as PDF") | |
# Define the event without _js | |
translate_button.click( | |
fn=translate_and_save, | |
inputs=[ | |
input_text, | |
input_file, | |
target_language | |
], | |
outputs=[ | |
output_pdf, | |
output_text | |
] | |
) | |
if __name__ == "__main__": | |
print(f"Gradio version: {gr.__version__}") | |
demo.launch(debug=True) |