finnstrom3693 commited on
Commit
f038f7c
1 Parent(s): 2312f78

Upload translation-id-en-marian-gradio.py

Browse files
Files changed (1) hide show
  1. translation-id-en-marian-gradio.py +56 -0
translation-id-en-marian-gradio.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import MarianMTModel, MarianTokenizer
3
+ import torch
4
+ from nltk.tokenize import sent_tokenize, LineTokenizer
5
+ import math
6
+ import nltk
7
+
8
+ nltk.download('punkt_tab')
9
+
10
+ # Load the translation model and tokenizer from Hugging Face
11
+ model_name = "opus-mt-id-en"
12
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
13
+ model = MarianMTModel.from_pretrained(model_name)
14
+
15
+ # Define the translation function with adaptive input handling
16
+ def translate_id_en(text):
17
+ # Tokenize the input into lines and sentences
18
+ lt = LineTokenizer()
19
+ batch_size = 8
20
+ paragraphs = lt.tokenize(text)
21
+ translated_paragraphs = []
22
+
23
+ for paragraph in paragraphs:
24
+ sentences = sent_tokenize(paragraph)
25
+ batches = math.ceil(len(sentences) / batch_size)
26
+ translated = []
27
+
28
+ # Process sentences in batches
29
+ for i in range(batches):
30
+ sent_batch = sentences[i * batch_size:(i + 1) * batch_size]
31
+ model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True)
32
+
33
+ # Generate translation
34
+ with torch.no_grad():
35
+ translated_batch = model.generate(**model_inputs)
36
+
37
+ # Decode the generated tokens into text
38
+ translated += [tokenizer.decode(t, skip_special_tokens=True) for t in translated_batch]
39
+
40
+ translated_paragraphs.append(" ".join(translated))
41
+
42
+ # Combine all paragraphs into the final translated text
43
+ translated_text = "\n\n".join(translated_paragraphs)
44
+ return translated_text
45
+
46
+ # Define the Gradio interface
47
+ iface = gr.Interface(
48
+ fn=translate_id_en, # Function to translate text
49
+ inputs=gr.Textbox(lines=12, placeholder="Enter Indonesian text...", label="Input (Indonesian)"), # Input box
50
+ outputs=gr.Textbox(lines=12, label="Output (English)"), # Output box
51
+ title="Indonesian to English Translator", # Title of the app
52
+ description="Translate Indonesian text to English using the opus-mt-id-en model."
53
+ )
54
+
55
+ # Launch the Gradio interface locally
56
+ iface.launch()