File size: 3,971 Bytes
6b9283a
 
 
b31c836
 
 
94e2407
 
b31c836
 
6b9283a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b31c836
6b9283a
b5d5b5e
 
 
6b9283a
b5d5b5e
6b9283a
b5d5b5e
b31c836
 
 
 
 
 
944d4d5
b31c836
 
 
 
 
 
 
 
b5d5b5e
b31c836
 
b5d5b5e
6b9283a
 
 
b5d5b5e
 
 
 
93f6210
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d5b5e
6b9283a
b5d5b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b9283a
b5d5b5e
 
 
 
 
6b9283a
 
b5d5b5e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
import torch
import nltk

# Download punkt for sentence tokenization
nltk.download('punkt')
nltk.download('punkt_tab')

from nltk.tokenize import sent_tokenize

# Cache for storing models and tokenizers
models_cache = {}

def load_model(model_name):
    """
    Load and cache the MarianMT model and tokenizer.
    """
    if model_name not in models_cache:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        if torch.cuda.is_available():
            model = model.to('cuda')
        models_cache[model_name] = (model, tokenizer)
    return models_cache[model_name]

def translate_text(model_name, text):
    """
    Translate input text sentence by sentence using the specified model.
    """
    if not model_name or not text:
        return "Please select a model and provide text for translation."
    
    try:
        # Load the model and tokenizer
        model, tokenizer = load_model(model_name)
        
        # Split text into sentences
        sentences = sent_tokenize(text)
        translated_sentences = []
        
        for sentence in sentences:
            # Tokenize the sentence
            print(f"Sentence: {sentence}\n")
            tokens = tokenizer(sentence, return_tensors="pt", padding=True)
            if torch.cuda.is_available():
                tokens = {k: v.to('cuda') for k, v in tokens.items()}
            
            # Generate translation for the sentence
            translated = model.generate(**tokens)
            translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
            translated_sentences.append(translated_text)
        
        # Join translated sentences back into a single string
        return " ".join(translated_sentences)
    
    except Exception as e:
        return f"Error: {str(e)}"

# Model options
model_options = [
    ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
    ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
    ("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"),
    ("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"),
    ("English to German", "Helsinki-NLP/opus-mt-en-de"),
    ("German to English", "Helsinki-NLP/opus-mt-de-en"),
    ("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"),
    ("Spanish to English", "Helsinki-NLP/opus-mt-es-en"),
    ("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"),
    ("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"),
    ("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"),
    ("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"),
    ("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"),
    ("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"),
    ("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"),
    ("Chinese to English", "Helsinki-NLP/opus-mt-zh-en")
]

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🌍 Real-Time Sentence Translation")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            label="Select Translation Model",
            choices=[option[1] for option in model_options],
            type="value",
        )
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Enter text (complete sentences)",
            lines=5,
            placeholder="Type here...",
        )
    
    with gr.Row():
        translate_button = gr.Button("Translate")
        clear_button = gr.Button("Clear")
    
    output_text = gr.Textbox(label="Translated Text", interactive=False)
    
    def clear_inputs():
        return "", ""
    
    translate_button.click(
        fn=translate_text,
        inputs=[model_dropdown, input_text],
        outputs=output_text,
    )
    
    clear_button.click(
        fn=clear_inputs,
        inputs=[],
        outputs=[input_text, output_text],
    )

# Run the Gradio app
demo.launch()