File size: 2,143 Bytes
f038f7c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
import torch
from nltk.tokenize import sent_tokenize, LineTokenizer
import math
import nltk

nltk.download('punkt_tab')

# Load the translation model and tokenizer from Hugging Face
model_name = "opus-mt-id-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# Define the translation function with adaptive input handling
def translate_id_en(text):
    # Tokenize the input into lines and sentences
    lt = LineTokenizer()
    batch_size = 8
    paragraphs = lt.tokenize(text)
    translated_paragraphs = []

    for paragraph in paragraphs:
        sentences = sent_tokenize(paragraph)
        batches = math.ceil(len(sentences) / batch_size)
        translated = []
        
        # Process sentences in batches
        for i in range(batches):
            sent_batch = sentences[i * batch_size:(i + 1) * batch_size]
            model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True)
            
            # Generate translation
            with torch.no_grad():
                translated_batch = model.generate(**model_inputs)
            
            # Decode the generated tokens into text
            translated += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_batch]
        
        translated_paragraphs.append(" ".join(translated))

    # Combine all paragraphs into the final translated text
    translated_text = "\n\n".join(translated_paragraphs)
    return translated_text

# Define the Gradio interface
iface = gr.Interface(
    fn=translate_id_en,  # Function to translate text
    inputs=gr.Textbox(lines=12, placeholder="Enter Indonesian text...", label="Input (Indonesian)"),  # Input box
    outputs=gr.Textbox(lines=12, label="Output (English)"),  # Output box
    title="Indonesian to English Translator",  # Title of the app
    description="Translate Indonesian text to English using the opus-mt-id-en model."
)

# Launch the Gradio interface locally
iface.launch()