gradio-demo / app.py
muhammadayman's picture
Update app.py
9714f44
raw
history blame
2.11 kB
import sys
#import subprocess
#from torch.utils.checkpoint import checkpoint
# implement pip as a subprocess:
#subprocess.check_call([sys.executable, '-m', 'pip', 'install','--quiet','sentencepiece==0.1.95'])
import gradio as gr
#from transformers import pipeline
from transformers import AutoTokenizer
import torch
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
model = torch.load("helsinki_fineTuned.pt", map_location=torch.device('cpu'))
model.eval()
#translation_pipeline = pipeline(model)
def translate_gradio(input):
'''
with tokenizer.as_target_tokenizer():
input_ids = tokenizer(input, return_tensors='pt')
encode = model.generate(**input_ids)
# encode = model.generate(**tokenizer.prepare_seq2seq_batch(input,return_tensors='pt'))
text_ar = tokenizer.batch_decode(encode,skip_special_tokens=True)[0]'''
tokenized_text = tokenizer.prepare_seq2seq_batch([input], return_tensors='pt')
# Perform translation and decode the output
encode = model.generate(**tokenized_text)
text_ar = tokenizer.batch_decode(encode,skip_special_tokens=True)[0]
return text_ar
#description = 'Translating "English Data Science" content into Arabic'
translate_interface = gr.Interface(fn = translate_gradio,
title = 'Translating "English Data Science" content into Arabic',
inputs=gr.inputs.Textbox(lines = 7, label = 'english content'),
outputs="text",
examples = [['In the last few years the RNN-based architectures have shown the best performance in machine translation problems, but still they have some problems that had to be solved. First, they have a difficulty to cope with long-range dependencies (also LSTM when it has to deal with really long sentences). Secondly, each hidden state depends on the previous one which impossible to parallelize and makes it inefficient on GPUs.']]
)
translate_interface.launch(inline = False)