File size: 2,492 Bytes
2b7cace cc91c93 3ca116b cc91c93 62a154b 3ca116b cefb4b4 cc91c93 cefb4b4 518fac7 02bf3e1 cefb4b4 3ca116b cefb4b4 02bf3e1 3ca116b cefb4b4 3ca116b b220179 cefb4b4 3ca116b cefb4b4 d92a459 cefb4b4 02bf3e1 cefb4b4 27d3684 cd72ac7 cefb4b4 3ca116b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
# this model was loaded from https://hf.co/models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M").to(device)
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
LANGS = ["pes_Arab", "ckb_Arab", "eng_Latn"]
langs_dict = {
"فارسی": "pes_Arab",
"کردی": "ckb_Arab",
"انگلیسی": "eng_Latn"
}
def translate(text, src_lang, tgt_lang):
"""
Translate the text from source lang to target lang
"""
translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=langs_dict[src_lang], tgt_lang=langs_dict[tgt_lang], max_length=400, device=device)
result = translation_pipeline(text)
return result[0]['translation_text']
def file_translate(sorce_file_path, src_lang, tgt_lang, pred_file_path):
sorce_list = []
with open(sorce_file_path, "r", encoding="utf-8") as sorce_file:
for line in sorce_file:
sorce_list.append(line.strip())
pred_list = []
for line in sorce_list:
pred_list.append(translate(line, src_lang, tgt_lang))
with open(pred_file_path, "w", encoding="utf-8") as output_file:
for translation in pred_list:
output_file.write(translation + "\n")
return pred_file_path
def add_line(input_path, output_path):
# خواندن محتوای فایل ورودی
with open(input_path, encoding="utf-8") as f:
text = f.read()
# اضافه کردن خط "سلام" به انتهای متن
new_text = text + "\nسلام"
# نوشتن متن جدید در فایل خروجی
with open(output_path, "w", encoding="utf-8") as f:
f.write(new_text)
return output_path
if __name__ == '__main__':
interface = gr.Interface(
fn=file_translate,
inputs=[
gr.components.File(label="Input File"),
gr.components.Dropdown(label="زبان مبدا", choices=list(langs_dict.keys())),
gr.components.Dropdown(label="زبان مقصد", choices=list(langs_dict.keys())),
gr.components.Textbox(label="Output File Name (optional)"),
],
outputs=[
gr.components.File(label="Modified File"),
],
title="NLLB 200 - (Translation Demo)",
description="This Gradio demo translate text files. (CPU)",
)
interface.launch()
|