File size: 3,154 Bytes
71ad94c
af923d2
 
 
 
 
 
71ad94c
af923d2
71ad94c
 
 
af923d2
 
 
 
71ad94c
 
af923d2
 
 
 
80ae4f4
 
af923d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import gradio as gr
from lang_list import (
    LANGUAGE_NAME_TO_CODE,
    T2TT_TARGET_LANGUAGE_NAMES,
    TEXT_SOURCE_LANGUAGE_NAMES,
)
DEFAULT_TARGET_LANGUAGE = "English"
from transformers import SeamlessM4TForTextToText
from transformers import AutoProcessor
model = SeamlessM4TForTextToText.from_pretrained("facebook/hf-seamless-m4t-medium")
processor = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")

# text_inputs = processor(text = "Hello, my dog is cute", src_lang="eng", return_tensors="pt")
# output_tokens = model.generate(**text_inputs, tgt_lang="pan")
# translated_text_from_text = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True)
# print(translated_text_from_text)


def run_t2tt(input_text: str, source_language: str, target_language: str) -> str:
    source_language_code = LANGUAGE_NAME_TO_CODE[source_language]
    target_language_code = LANGUAGE_NAME_TO_CODE[target_language]
    text_inputs = processor(text = input_text, src_lang=source_language_code , return_tensors="pt")
    output_tokens = model.generate(**text_inputs, tgt_lang=target_language_code)
    output = processor.decode(output_tokens[0].tolist(), skip_special_tokens=True)
    return str(output)



with gr.Blocks() as demo_t2tt:
    with gr.Row():
        with gr.Column():
            with gr.Group():
                input_text = gr.Textbox(label="Input text")
                with gr.Row():
                    source_language = gr.Dropdown(
                        label="Source language",
                        choices=TEXT_SOURCE_LANGUAGE_NAMES,
                        value="English",
                    )
                    target_language = gr.Dropdown(
                        label="Target language",
                        choices=T2TT_TARGET_LANGUAGE_NAMES,
                        value=DEFAULT_TARGET_LANGUAGE,
                    )
            btn = gr.Button("Translate")
        with gr.Column():
            output_text = gr.Textbox(label="Translated text")

    gr.Examples(
        examples=[
            [
                "My favorite animal is the elephant.",
                "English",
                "French",
            ],
            [
                "My favorite animal is the elephant.",
                "English",
                "Mandarin Chinese",
            ],
            [
                "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
                "English",
                "Hindi",
            ],
            [
                "Meta AI's Seamless M4T model is democratising spoken communication across language barriers",
                "English",
                "Spanish",
            ],
        ],
        inputs=[input_text, source_language, target_language],
        outputs=output_text,
        fn=run_t2tt,
        cache_examples=True,
        api_name=False,
    )

    gr.on(
        triggers=[input_text.submit, btn.click],
        fn=run_t2tt,
        inputs=[input_text, source_language, target_language],
        outputs=output_text,
        api_name="t2tt",
    )

if __name__ == "__main__":
    demo_t2tt.launch()