File size: 4,597 Bytes
71ad94c
af923d2
 
 
 
 
 
71ad94c
af923d2
71ad94c
 
 
af923d2
 
 
 
71ad94c
 
79868fd
 
 
af923d2
 
 
80ae4f4
 
af923d2
 
 
 
 
 
 
79868fd
af923d2
 
 
 
 
79868fd
af923d2
 
 
 
 
 
 
 
 
 
 
 
 
79868fd
af923d2
ef6d6f0
af923d2
 
ef6d6f0
af923d2
ef6d6f0
af923d2
 
ef6d6f0
af923d2
ef6d6f0
af923d2
 
ef6d6f0
 
af923d2
 
 
79868fd
af923d2
 
79868fd
af923d2
 
 
 
 
 
79868fd
af923d2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
from lang_list import (
    LANGUAGE_NAME_TO_CODE,
    T2TT_TARGET_LANGUAGE_NAMES,
    TEXT_SOURCE_LANGUAGE_NAMES,
)
DEFAULT_TARGET_LANGUAGE = "English"
from transformers import SeamlessM4TForTextToText
from transformers import AutoProcessor
model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")

# text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
# output_tokens = model.generate(**text_inputs, tgt_lang="pan")
# translated_text_from_text = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True)
# print(translated_text_from_text)


def run_t2tt(file_uploader , input_text: str, source_language: str, target_language: str) -> str:
    if file_uploader is not None:
        input_text = file_uploader.read().decode("utf-8")
    source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
    text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
    output_tokens = model.generate(**text_inputs, tgt_lang=target_language_code)
    output = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True)
    return str(output)


with gr.Blocks() as demo_t2tt:
    with gr.Row():
        with gr.Column():
            with gr.Group():
                file_uploader = gr.File(label="Upload a text file (Optional)", type="txt")
                input_text = gr.Textbox(label="Input text")
                with gr.Row():
                    source_language = gr.Dropdown(
                        label="Source language",
                        choices=TEXT_SOURCE_LANGUAGE_NAMES,
                        value="Punjabi",
                    )
                    target_language = gr.Dropdown(
                        label="Target language",
                        choices=T2TT_TARGET_LANGUAGE_NAMES,
                        value=DEFAULT_TARGET_LANGUAGE,
                    )
            btn = gr.Button("Translate")
        with gr.Column():
            output_text = gr.Textbox(label="Translated text")

    gr.Examples(
        examples=[
            [
                "The sinister destruction of the holy Akal Takht and the ruthless massacre of thousands of innocent pilgrims had unmasked the deep-seated hatred and animosity that the Indian Government had been nurturing against Sikhs ever since independence",
                "English",
                "Punjabi",
            ],
            [
                "It contains. much useful information about administrative, revenue, judicial and ecclesiastical activities in various areas which, it is hoped, would supplement the information available in official records.",
                "English",
                "Hindi",
            ],
            [
                "दुनिया में बहुत सी अलग-अलग भाषाएं हैं और उनमें अपने वर्ण और शब्दों का भंडार होता है. इसमें में कुछ उनके अपने शब्द होते हैं तो कुछ ऐसे भी हैं, जो दूसरी भाषाओं से लिए जाते हैं.",
                "Hindi",
                "Punjabi",
            ],
            [
                "ਸੂੂਬੇ ਦੇ ਕਈ ਜ਼ਿਲ੍ਹਿਆਂ ’ਚ ਬੁੱਧਵਾਰ ਸਵੇਰੇ ਸੰਘਣੀ ਧੁੰਦ ਛਾਈ ਰਹੀ ਤੇ ਤੇਜ਼ ਹਵਾਵਾਂ ਨੇ ਕਾਂਬਾ ਹੋਰ ਵਧਾ ਦਿੱਤਾ। ਸੱਤ ਸ਼ਹਿਰਾਂ ’ਚ ਦਿਨ ਦਾ ਤਾਪਮਾਨ ਦਸ ਡਿਗਰੀ ਸੈਲਸੀਅਸ ਦੇ ਆਸਪਾਸ ਰਿਹਾ। ਸੂਬੇ ’ਚ ਵੱਧ ਤੋਂ ਵੱਧ ਤਾਪਮਾਨ ’ਚ ਵੀ ਦਸ ਡਿਗਰੀ ਸੈਲਸੀਅਸ ਦੀ ਗਿਰਾਵਟ ਦਰਜ ਕੀਤੀ ਗਈ",
                "Punjabi",
                "English",
            ],
        ],
        inputs=[file_uploader ,input_text, source_language, target_language],
        outputs=output_text,
        fn=run_t2tt,
        cache_examples=False,
        api_name=False,
    )

    gr.on(
        triggers=[input_text.submit, btn.click],
        fn=run_t2tt,
        inputs=[file_uploader, input_text, source_language, target_language],
        outputs=output_text,
        api_name="t2tt",
    )

if __name__ == "__main__":
    demo_t2tt.launch()