File size: 3,268 Bytes
5e33499
 
0b83364
 
 
5e33499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b83364
 
 
 
7bf8c01
 
 
 
 
0b83364
 
 
 
 
7bf8c01
0b83364
 
 
 
5e33499
0b83364
239bbb6
5e33499
0b83364
 
5e33499
0b83364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bf8c01
0b83364
 
 
 
 
 
5e33499
0b83364
 
 
 
5e33499
0b83364
 
5e33499
 
 
0b83364
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os

import gradio
import sign_language_translator as slt

description = """Enter your text and select languages from the dropdowns, then click Submit to generate a video. [`Repository`](https://github.com/sign-language-translator/sign-language-translator)

The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`model code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)

> NOTE: This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words."""

HF_TOKEN = os.getenv("HF_TOKEN")
hf_writer = (
    gradio.HuggingFaceDatasetSaver(
        HF_TOKEN, "crowdsourced-text-to-sign-language-rule-based-translation-corpus"
    )
    if HF_TOKEN
    else None
)

model = slt.models.ConcatenativeSynthesis("ur", "pk-sl", "video")


def text_to_video(
    text: str,
    text_language: str,
    sign_language: str,
    output_path: str = "output.mp4",
    codec="h264",
):
    model.text_language = text_language
    model.sign_language = sign_language

    video = model.translate(text)
    video.save(output_path, overwrite=True, codec=codec)

    # ToDo: video.watermark("Sign Language Translator\nAI Generated Video")


def predict(text: str, text_lang: str, sign_lang: str):
    try:
        path = "output.mp4"
        text_to_video(text, text_lang, sign_lang, output_path=path, codec="mp4v")
        return path
    except Exception as exc:
        raise gradio.Error(f"Error during translation: {exc}")


gradio_app = gradio.Interface(
    fn=predict,
    inputs=[
        gradio.Textbox(
            lines=2,
            placeholder="Enter Text Here...",
            label="Spoken Language Sentence",
        ),
        gradio.Dropdown(
            choices=[code.value for code in slt.TextLanguageCodes],
            value=slt.TextLanguageCodes.URDU.value,
            label="Text Language",
        ),
        gradio.Dropdown(
            choices=[code.value for code in slt.SignLanguageCodes],
            value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value,
            label="Sign Language",
        ),
    ],  # type: ignore
    outputs=gradio.Video(
        format="mp4",
        label="Synthesized Sign Language Video",
        autoplay=True,
        show_download_button=True,
        include_audio=False,
    ),
    title="Concatenative Synthesis: Rule Based Text to Sign Language Translator",
    description=description,
    examples=[
        ["یہ بہت اچھا ہے۔", "ur", "pakistan-sign-language"],
        ["یہ کام بہت آسان ہے۔", "ur", "pakistan-sign-language"],
        ["पाँच घंटे।", "hi", "pakistan-sign-language"],
        # ["आप कैसे हैं?", "hi", "pakistan-sign-language"],
    ],
    allow_flagging="auto",
    flagging_callback=hf_writer,
    thumbnail="https://cdn-uploads.huggingface.co/production/uploads/6368b375fbfe97c16a401079/1hUEuDUvqCZM0fLVhIAT1.png",
    # cache_examples="lazy",
)

if __name__ == "__main__":
    gradio_app.launch()