Spaces:
Sleeping
Sleeping
Commit
·
de6ea7c
1
Parent(s):
9714f44
Update app.py
Browse files
app.py
CHANGED
@@ -1,41 +1,22 @@
|
|
1 |
import sys
|
2 |
-
#import subprocess
|
3 |
-
#from torch.utils.checkpoint import checkpoint
|
4 |
-
# implement pip as a subprocess:
|
5 |
-
#subprocess.check_call([sys.executable, '-m', 'pip', 'install','--quiet','sentencepiece==0.1.95'])
|
6 |
-
|
7 |
import gradio as gr
|
8 |
-
#from transformers import pipeline
|
9 |
from transformers import AutoTokenizer
|
10 |
import torch
|
11 |
|
12 |
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
|
13 |
model = torch.load("helsinki_fineTuned.pt", map_location=torch.device('cpu'))
|
14 |
model.eval()
|
15 |
-
#translation_pipeline = pipeline(model)
|
16 |
|
17 |
|
18 |
def translate_gradio(input):
|
19 |
-
|
20 |
-
with tokenizer.as_target_tokenizer():
|
21 |
-
input_ids = tokenizer(input, return_tensors='pt')
|
22 |
-
encode = model.generate(**input_ids)
|
23 |
-
# encode = model.generate(**tokenizer.prepare_seq2seq_batch(input,return_tensors='pt'))
|
24 |
-
text_ar = tokenizer.batch_decode(encode,skip_special_tokens=True)[0]'''
|
25 |
-
|
26 |
tokenized_text = tokenizer.prepare_seq2seq_batch([input], return_tensors='pt')
|
27 |
-
|
28 |
-
# Perform translation and decode the output
|
29 |
encode = model.generate(**tokenized_text)
|
30 |
text_ar = tokenizer.batch_decode(encode,skip_special_tokens=True)[0]
|
31 |
return text_ar
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
#description = 'Translating "English Data Science" content into Arabic'
|
38 |
translate_interface = gr.Interface(fn = translate_gradio,
|
|
|
39 |
title = 'Translating "English Data Science" content into Arabic',
|
40 |
inputs=gr.inputs.Textbox(lines = 7, label = 'english content'),
|
41 |
outputs="text",
|
|
|
1 |
import sys
|
|
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
|
|
3 |
from transformers import AutoTokenizer
|
4 |
import torch
|
5 |
|
6 |
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
|
7 |
model = torch.load("helsinki_fineTuned.pt", map_location=torch.device('cpu'))
|
8 |
model.eval()
|
|
|
9 |
|
10 |
|
11 |
def translate_gradio(input):
|
12 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
tokenized_text = tokenizer.prepare_seq2seq_batch([input], return_tensors='pt')
|
|
|
|
|
14 |
encode = model.generate(**tokenized_text)
|
15 |
text_ar = tokenizer.batch_decode(encode,skip_special_tokens=True)[0]
|
16 |
return text_ar
|
17 |
|
|
|
|
|
|
|
|
|
|
|
18 |
translate_interface = gr.Interface(fn = translate_gradio,
|
19 |
+
allow_flagging = True,
|
20 |
title = 'Translating "English Data Science" content into Arabic',
|
21 |
inputs=gr.inputs.Textbox(lines = 7, label = 'english content'),
|
22 |
outputs="text",
|