Spaces:
Sleeping
Sleeping
File size: 3,971 Bytes
6b9283a b31c836 94e2407 b31c836 6b9283a b31c836 6b9283a b5d5b5e 6b9283a b5d5b5e 6b9283a b5d5b5e b31c836 944d4d5 b31c836 b5d5b5e b31c836 b5d5b5e 6b9283a b5d5b5e 93f6210 b5d5b5e 6b9283a b5d5b5e 6b9283a b5d5b5e 6b9283a b5d5b5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
import torch
import nltk
# Download punkt for sentence tokenization
nltk.download('punkt')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize
# Cache for storing models and tokenizers
models_cache = {}
def load_model(model_name):
"""
Load and cache the MarianMT model and tokenizer.
"""
if model_name not in models_cache:
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
if torch.cuda.is_available():
model = model.to('cuda')
models_cache[model_name] = (model, tokenizer)
return models_cache[model_name]
def translate_text(model_name, text):
"""
Translate input text sentence by sentence using the specified model.
"""
if not model_name or not text:
return "Please select a model and provide text for translation."
try:
# Load the model and tokenizer
model, tokenizer = load_model(model_name)
# Split text into sentences
sentences = sent_tokenize(text)
translated_sentences = []
for sentence in sentences:
# Tokenize the sentence
print(f"Sentence: {sentence}\n")
tokens = tokenizer(sentence, return_tensors="pt", padding=True)
if torch.cuda.is_available():
tokens = {k: v.to('cuda') for k, v in tokens.items()}
# Generate translation for the sentence
translated = model.generate(**tokens)
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
translated_sentences.append(translated_text)
# Join translated sentences back into a single string
return " ".join(translated_sentences)
except Exception as e:
return f"Error: {str(e)}"
# Model options
model_options = [
("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"),
("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"),
("English to German", "Helsinki-NLP/opus-mt-en-de"),
("German to English", "Helsinki-NLP/opus-mt-de-en"),
("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"),
("Spanish to English", "Helsinki-NLP/opus-mt-es-en"),
("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"),
("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"),
("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"),
("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"),
("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"),
("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"),
("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"),
("Chinese to English", "Helsinki-NLP/opus-mt-zh-en")
]
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# 🌍 Real-Time Sentence Translation")
with gr.Row():
model_dropdown = gr.Dropdown(
label="Select Translation Model",
choices=[option[1] for option in model_options],
type="value",
)
with gr.Row():
input_text = gr.Textbox(
label="Enter text (complete sentences)",
lines=5,
placeholder="Type here...",
)
with gr.Row():
translate_button = gr.Button("Translate")
clear_button = gr.Button("Clear")
output_text = gr.Textbox(label="Translated Text", interactive=False)
def clear_inputs():
return "", ""
translate_button.click(
fn=translate_text,
inputs=[model_dropdown, input_text],
outputs=output_text,
)
clear_button.click(
fn=clear_inputs,
inputs=[],
outputs=[input_text, output_text],
)
# Run the Gradio app
demo.launch()
|