File size: 2,492 Bytes
2b7cace
cc91c93
 
3ca116b
cc91c93
 
62a154b
 
3ca116b
 
cefb4b4
 
 
 
 
 
 
 
 
 
 
cc91c93
cefb4b4
 
518fac7
02bf3e1
cefb4b4
 
 
 
3ca116b
cefb4b4
 
02bf3e1
3ca116b
cefb4b4
 
 
 
3ca116b
b220179
 
 
 
 
 
 
 
 
 
 
 
 
 
cefb4b4
3ca116b
cefb4b4
d92a459
cefb4b4
 
02bf3e1
 
cefb4b4
 
 
 
 
27d3684
cd72ac7
cefb4b4
 
3ca116b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch

# this model was loaded from https://hf.co/models
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M").to(device)
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")


LANGS = ["pes_Arab", "ckb_Arab", "eng_Latn"]
langs_dict = {
    "فارسی": "pes_Arab",
    "کردی": "ckb_Arab",
    "انگلیسی": "eng_Latn"
    }

def translate(text, src_lang, tgt_lang):
    """
    Translate the text from source lang to target lang
    """
    translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=langs_dict[src_lang], tgt_lang=langs_dict[tgt_lang], max_length=400, device=device)
    result = translation_pipeline(text)
    return result[0]['translation_text']

def file_translate(sorce_file_path, src_lang, tgt_lang, pred_file_path):
    sorce_list = []
    with open(sorce_file_path, "r", encoding="utf-8") as sorce_file:
        for line in sorce_file:
            sorce_list.append(line.strip())

    pred_list = []
    for line in sorce_list:
        pred_list.append(translate(line, src_lang, tgt_lang))

    with open(pred_file_path, "w", encoding="utf-8") as output_file:
        for translation in pred_list:
            output_file.write(translation + "\n")
    return pred_file_path

def add_line(input_path, output_path):
    # خواندن محتوای فایل ورودی
    with open(input_path, encoding="utf-8") as f:
        text = f.read()

    # اضافه کردن خط "سلام" به انتهای متن
    new_text = text + "\nسلام"

    # نوشتن متن جدید در فایل خروجی
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(new_text)

    return output_path

if __name__ == '__main__':

    interface = gr.Interface(
    fn=file_translate,
    inputs=[
        gr.components.File(label="Input File"),
        gr.components.Dropdown(label="زبان مبدا", choices=list(langs_dict.keys())),
        gr.components.Dropdown(label="زبان مقصد", choices=list(langs_dict.keys())),
        gr.components.Textbox(label="Output File Name (optional)"),
    ],
    outputs=[
        gr.components.File(label="Modified File"),
    ],
    title="NLLB 200 - (Translation Demo)",
    description="This Gradio demo translate text files. (CPU)",
    )
    interface.launch()