mdsr commited on
Commit
106e748
·
1 Parent(s): 2061100

block layout with sample text generator

Browse files
Files changed (1) hide show
  1. app.py +160 -48
app.py CHANGED
@@ -1,25 +1,68 @@
1
  import os
 
 
 
2
 
3
  import gradio
4
  import sign_language_translator as slt
5
 
6
- description = """Enter your text and select languages from the dropdowns, then click Submit to generate a video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator)
7
 
8
  The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
9
 
10
  > NOTE: This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words."""
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
- hf_writer = (
14
  gradio.HuggingFaceDatasetSaver(
15
  HF_TOKEN,
16
  "sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus",
17
  )
18
  if HF_TOKEN
19
- else None
20
  )
21
 
22
- model = slt.models.ConcatenativeSynthesis("ur", "pk-sl", "video")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  def text_to_video(
@@ -27,65 +70,134 @@ def text_to_video(
27
  text_language: str,
28
  sign_language: str,
29
  output_path: str = "output.mp4",
30
- codec="h264",
31
  ):
32
- model.text_language = text_language
33
- model.sign_language = sign_language
34
 
35
- video = model.translate(text)
36
  video.save(output_path, overwrite=True, codec=codec)
37
 
38
  # ToDo: video.watermark("Sign Language Translator\nAI Generated Video")
39
 
40
 
41
- def predict(text: str, text_lang: str, sign_lang: str):
 
 
 
 
 
 
 
42
  try:
43
  path = "output.mp4"
44
  text_to_video(text, text_lang, sign_lang, output_path=path, codec="mp4v")
 
45
  return path
 
46
  except Exception as exc:
 
 
47
  raise gradio.Error(f"Error during translation: {exc}")
48
 
49
 
50
- gradio_app = gradio.Interface(
51
- fn=predict,
52
- inputs=[
53
- gradio.Textbox(
54
- lines=2,
55
- placeholder="Enter Text Here...",
56
- label="Spoken Language Sentence",
57
- ),
58
- gradio.Dropdown(
59
- choices=[code.value for code in slt.TextLanguageCodes],
60
- value=slt.TextLanguageCodes.URDU.value,
61
- label="Text Language",
62
- ),
63
- gradio.Dropdown(
64
- choices=[code.value for code in slt.SignLanguageCodes],
65
- value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value,
66
- label="Sign Language",
67
- ),
68
- ], # type: ignore
69
- outputs=gradio.Video(
70
- format="mp4",
71
- label="Synthesized Sign Language Video",
72
- autoplay=True,
73
- show_download_button=True,
74
- include_audio=False,
75
- ),
76
- title="Concatenative Synthesis: Rule Based Text to Sign Language Translator",
77
- description=description,
78
- examples=[
79
- ["یہ بہت اچھا ہے۔", "ur", "pakistan-sign-language"],
80
- ["یہ کام بہت آسان ہے۔", "ur", "pakistan-sign-language"],
81
- ["पाँच घंटे।", "hi", "pakistan-sign-language"],
82
- # ["आप कैसे हैं?", "hi", "pakistan-sign-language"],
83
- ],
84
- allow_flagging="auto",
85
- flagging_callback=hf_writer,
86
- thumbnail="https://cdn-uploads.huggingface.co/production/uploads/6368b375fbfe97c16a401079/1hUEuDUvqCZM0fLVhIAT1.png",
87
- # cache_examples="lazy",
88
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if __name__ == "__main__":
91
  gradio_app.launch()
 
1
  import os
2
+ import re
3
+ from datetime import datetime
4
+ from typing import Dict
5
 
6
  import gradio
7
  import sign_language_translator as slt
8
 
9
+ DESCRIPTION = """Enter your text and select languages from the dropdowns, then click Submit to generate a video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator)
10
 
11
  The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
12
 
13
  > NOTE: This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words."""
14
 
15
+ TITLE = "Concatenative Synthesis: Rule Based Text to Sign Language Translator"
16
+
17
+ CUSTOM_JS = """<script>
18
+ const rtlLanguages = ["ur", "ar"];
19
+
20
+ function updateTextareaDir(language) {
21
+ const sourceTextarea = document.getElementById("source-textbox").querySelector("textarea");
22
+
23
+ if (rtlLanguages.includes(language)) {
24
+ sourceTextarea.setAttribute("dir", "rtl");
25
+ } else {
26
+ sourceTextarea.setAttribute("dir", "ltr");
27
+ }
28
+ }
29
+ </script>"""
30
+ # todo: add dropdown keyboard custom component with key mapping
31
+
32
  HF_TOKEN = os.getenv("HF_TOKEN")
33
+ request_logger = (
34
  gradio.HuggingFaceDatasetSaver(
35
  HF_TOKEN,
36
  "sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus",
37
  )
38
  if HF_TOKEN
39
+ else gradio.CSVLogger()
40
  )
41
 
42
+ translation_model = slt.models.ConcatenativeSynthesis("ur", "pk-sl", "video")
43
+ language_models: Dict[str, slt.models.BeamSampling] = {}
44
+
45
+
46
+ def auto_complete_text(model_code: str, text: str):
47
+ if model_code not in language_models:
48
+ lm = slt.get_model(model_code)
49
+ language_models[model_code] = slt.models.BeamSampling(
50
+ lm, # type: ignore
51
+ start_of_sequence_token=getattr(lm, "start_of_sequence_token", " "), # type: ignore
52
+ )
53
+
54
+ # todo: better tokenize/detokenize
55
+ tokens = [w for w in re.split(r"\b", text) if w]
56
+ lm = language_models[model_code]
57
+ lm.max_length = len(tokens) + 10
58
+ completion, _ = lm.complete(tokens or None)
59
+ if completion[0] == lm.start_of_sequence_token: # type: ignore
60
+ completion = completion[1:] # type: ignore
61
+ if completion[-1] == lm.end_of_sequence_token: # type: ignore
62
+ completion = completion[:-1] # type: ignore
63
+ new_text = "".join(completion)
64
+
65
+ return new_text
66
 
67
 
68
  def text_to_video(
 
70
  text_language: str,
71
  sign_language: str,
72
  output_path: str = "output.mp4",
73
+ codec="h264", # ToDo: install h264 codec for opencv
74
  ):
75
+ translation_model.text_language = text_language
76
+ translation_model.sign_language = sign_language
77
 
78
+ video = translation_model.translate(text)
79
  video.save(output_path, overwrite=True, codec=codec)
80
 
81
  # ToDo: video.watermark("Sign Language Translator\nAI Generated Video")
82
 
83
 
84
+ def translate(text: str, text_lang: str, sign_lang: str):
85
+ log = [
86
+ text,
87
+ text_lang,
88
+ sign_lang,
89
+ None,
90
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
91
+ ]
92
  try:
93
  path = "output.mp4"
94
  text_to_video(text, text_lang, sign_lang, output_path=path, codec="mp4v")
95
+ request_logger.flag(log)
96
  return path
97
+
98
  except Exception as exc:
99
+ log[3] = str(exc)
100
+ request_logger.flag(log)
101
  raise gradio.Error(f"Error during translation: {exc}")
102
 
103
 
104
+ with gradio.Blocks(title=TITLE, head=CUSTOM_JS) as gradio_app:
105
+ gradio.Markdown(f"# {TITLE}")
106
+ gradio.Markdown(DESCRIPTION)
107
+ with gradio.Row():
108
+ with gradio.Column():
109
+ gradio.Markdown("## Input Text")
110
+ with gradio.Row():
111
+ with gradio.Column():
112
+ source_textbox = gradio.Textbox(
113
+ lines=5,
114
+ placeholder="Enter Text Here...",
115
+ label="Spoken Language Sentence",
116
+ show_copy_button=True,
117
+ elem_id="source-textbox",
118
+ )
119
+ with gradio.Column():
120
+ gradio.Markdown("Generate sample text instead:")
121
+ with gradio.Row():
122
+ language_model_dropdown = gradio.Dropdown(
123
+ choices=[
124
+ slt.ModelCodes.MIXER_LM_NGRAM_URDU.value,
125
+ slt.ModelCodes.TRANSFORMER_LM_UR_SUPPORTED.value,
126
+ ],
127
+ value=slt.ModelCodes.MIXER_LM_NGRAM_URDU.value,
128
+ label="Language Model for auto-complete",
129
+ )
130
+ with gradio.Row():
131
+ clear_button = gradio.ClearButton(
132
+ source_textbox, api_name=False
133
+ )
134
+ auto_complete_button = gradio.Button("Auto-Complete")
135
+ auto_complete_button.click(
136
+ auto_complete_text,
137
+ inputs=[language_model_dropdown, source_textbox],
138
+ outputs=[source_textbox],
139
+ api_name=False,
140
+ )
141
+
142
+ gradio.Markdown("## Select Languages")
143
+ with gradio.Row():
144
+ text_lang_dropdown = gradio.Dropdown(
145
+ choices=[code.value for code in slt.TextLanguageCodes],
146
+ value=slt.TextLanguageCodes.URDU.value,
147
+ label="Text Language",
148
+ elem_id="text-lang-dropdown",
149
+ )
150
+ text_lang_dropdown.change(
151
+ None, inputs=text_lang_dropdown, js="updateTextareaDir"
152
+ )
153
+ sign_lang_dropdown = gradio.Dropdown(
154
+ choices=[code.value for code in slt.SignLanguageCodes],
155
+ value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value,
156
+ label="Sign Language",
157
+ )
158
+ # todo: sign format: video/landmarks (tabs?)
159
+
160
+ with gradio.Column():
161
+ gradio.Markdown("## Output Sign Language")
162
+ output_video = gradio.Video(
163
+ format="mp4",
164
+ label="Synthesized Sign Language Video",
165
+ autoplay=True,
166
+ show_download_button=True,
167
+ include_audio=False,
168
+ )
169
+
170
+ with gradio.Row():
171
+ translate_button = gradio.Button("Translate", variant="primary")
172
+ translate_button.click(
173
+ translate,
174
+ inputs=[source_textbox, text_lang_dropdown, sign_lang_dropdown],
175
+ outputs=[output_video],
176
+ api_name="translate",
177
+ )
178
+
179
+ gradio.Examples(
180
+ [
181
+ ["یہ بہت اچھا ہے۔", "ur", "pakistan-sign-language"],
182
+ ["وہ کام آسان تھا۔", "ur", "pakistan-sign-language"],
183
+ ["पाँच घंटे।", "hi", "pakistan-sign-language"],
184
+ # ["आप कैसे हैं?", "hi", "pakistan-sign-language"],
185
+ ],
186
+ inputs=[source_textbox, text_lang_dropdown, sign_lang_dropdown],
187
+ outputs=output_video,
188
+ )
189
+ request_logger.setup(
190
+ [
191
+ source_textbox,
192
+ text_lang_dropdown,
193
+ sign_lang_dropdown,
194
+ gradio.Markdown(label="Exception"),
195
+ gradio.Markdown(label="Timestamp"),
196
+ ],
197
+ "flagged",
198
+ )
199
+
200
+ gradio_app.load(None, inputs=[text_lang_dropdown], js="updateTextareaDir")
201
 
202
  if __name__ == "__main__":
203
  gradio_app.launch()