File size: 1,507 Bytes
5bcc945
9981df6
e16d912
7a73074
9981df6
9aabe06
d0dc5fc
0feee31
9981df6
 
 
de6ea7c
eb04c56
 
cbf8f16
9aabe06
 
9981df6
de6ea7c
768877b
8041893
e905d1f
9714f44
 
808b6c8
36a2e00
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import sys
import gradio as gr
from transformers import AutoTokenizer
import torch

tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
model = torch.load("helsinki_fineTuned.pt", map_location=torch.device('cpu'))
model.eval()


def translate_gradio(input):

    tokenized_text = tokenizer.prepare_seq2seq_batch([input], return_tensors='pt')
    encode = model.generate(**tokenized_text)
    text_ar = tokenizer.batch_decode(encode,skip_special_tokens=True)[0]
    return text_ar

translate_interface = gr.Interface(fn = translate_gradio,
                                   allow_flagging = True,
                                   flagging_dir = 'Flags',
                                   title = 'Translating "English Data Science" content into Arabic',
                                   inputs=gr.inputs.Textbox(lines = 7, label = 'english content'),
                                   outputs="text",
                                   examples = [['In the last few years the RNN-based architectures have shown the best performance in machine translation problems, but still they have some problems that had to be solved. First, they have a difficulty to cope with long-range dependencies (also LSTM when it has to deal with really long sentences). Secondly, each hidden state depends on the previous one which impossible to parallelize and makes it inefficient on GPUs.']]
                                                               )
translate_interface.launch(inline = False)