walaa2022 commited on
Commit
52881b0
·
verified ·
1 Parent(s): da478b5

Upload 3 files

Browse files
Files changed (3) hide show
  1. english-only-app.py +214 -0
  2. packages.txt +4 -0
  3. requirements (5).txt +2 -0
english-only-app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from datetime import datetime
4
+
5
+ import gradio
6
+ import sign_language_translator as slt
7
+
8
+ DESCRIPTION = """Enter your English text and click Submit to generate a sign language video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator)
9
+ The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
10
+ > **NOTE**
11
+ > - This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words.
12
+ > - This version needs to re-encode the generated video so that will take some extra time after translation.
13
+ > - Since this is a rule-based model, you will have to add **context** to ambiguous words (e.g. glass(material) vs glass(container)).
14
+ """.strip()
15
+
16
+ TITLE = "English to Sign Language Translator"
17
+
18
+ CUSTOM_CSS = """
19
+ #auto-complete-button {
20
+ border-color: var(--button-primary-border-color-hover);
21
+ }
22
+ """
23
+
24
+ HF_TOKEN = os.getenv("HF_TOKEN")
25
+ request_logger = (
26
+ gradio.HuggingFaceDatasetSaver(
27
+ HF_TOKEN,
28
+ "sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus",
29
+ )
30
+ if HF_TOKEN
31
+ else gradio.CSVLogger()
32
+ )
33
+
34
+ translation_model = slt.models.ConcatenativeSynthesis("en", "pk-sl", "video")
35
+ language_models = {}
36
+
37
+ def auto_complete_text(model_code: str, text: str):
38
+ if model_code not in language_models:
39
+ lm = slt.get_model(model_code)
40
+ language_models[model_code] = slt.models.BeamSampling(
41
+ lm, # type: ignore
42
+ start_of_sequence_token=getattr(lm, "start_of_sequence_token", "<"), # type: ignore
43
+ end_of_sequence_token=getattr(lm, "end_of_sequence_token", ">"), # type: ignore
44
+ )
45
+
46
+ tokens = [w for w in re.split(r"\b", text) if w]
47
+ lm = language_models[model_code]
48
+ lm.max_length = len(tokens) + 10
49
+ completion, _ = lm.complete(tokens or None)
50
+ if completion[0] == lm.start_of_sequence_token: # type: ignore
51
+ completion = completion[1:] # type: ignore
52
+ if completion[-1] == lm.end_of_sequence_token: # type: ignore
53
+ completion = completion[:-1] # type: ignore
54
+ new_text = "".join(completion)
55
+
56
+ return new_text
57
+
58
+
59
+ def text_to_video(
60
+ text: str,
61
+ sign_language: str = "pakistan-sign-language",
62
+ sign_format: str = "video",
63
+ output_path: str = "output.mp4",
64
+ codec="h264", # ToDo: install h264 codec for opencv
65
+ ):
66
+ translation_model.text_language = "en" # Hardcoded to English
67
+ translation_model.sign_language = sign_language
68
+ translation_model.sign_format = sign_format
69
+ if sign_format == "landmarks":
70
+ translation_model.sign_embedding_model = "mediapipe-world"
71
+
72
+ # Convert first letter to lowercase as per original code
73
+ text = text[:1].lower() + text[1:]
74
+
75
+ sign = translation_model.translate(text)
76
+ if isinstance(sign, slt.Landmarks):
77
+ # hands moved to pose wrists
78
+ sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3]
79
+ sign.data[:, 54: , :3] += -sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3]
80
+
81
+ sign.save_animation(output_path, overwrite=True)
82
+ else:
83
+ sign.save(output_path, overwrite=True, codec=codec)
84
+
85
+
86
+ def translate(text: str, sign_lang: str, sign_format: str):
87
+ log = [
88
+ text,
89
+ "en",
90
+ sign_lang,
91
+ None,
92
+ datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
93
+ ]
94
+ try:
95
+ path = "output.mp4"
96
+ text_to_video(
97
+ text,
98
+ sign_language=sign_lang,
99
+ sign_format=sign_format,
100
+ output_path=path,
101
+ codec="mp4v",
102
+ )
103
+ request_logger.flag(log)
104
+ return path
105
+
106
+ except Exception as exc:
107
+ log[3] = str(exc)
108
+ request_logger.flag(log)
109
+ raise gradio.Error(f"Error during translation: {exc}")
110
+
111
+
112
+ with gradio.Blocks(title=TITLE, css=CUSTOM_CSS) as gradio_app:
113
+ gradio.Markdown(f"# {TITLE}")
114
+ gradio.Markdown(DESCRIPTION)
115
+ with gradio.Row():
116
+ with gradio.Column(): # Inputs
117
+ gradio.Markdown("## Select Output Options")
118
+ with gradio.Row():
119
+ sign_lang_dropdown = gradio.Dropdown(
120
+ choices=[code.value for code in slt.SignLanguageCodes],
121
+ value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value,
122
+ label="Sign Language",
123
+ )
124
+ output_format_dropdown = gradio.Dropdown(
125
+ choices=[
126
+ slt.SignFormatCodes.VIDEO.value,
127
+ slt.SignFormatCodes.LANDMARKS.value,
128
+ ],
129
+ value=slt.SignFormatCodes.VIDEO.value,
130
+ label="Output Format",
131
+ )
132
+
133
+ gradio.Markdown("## Input English Text")
134
+ with gradio.Row(): # Source TextArea
135
+ source_textbox = gradio.Textbox(
136
+ lines=4,
137
+ placeholder="Enter English Text Here...",
138
+ label="English Sentence",
139
+ show_copy_button=True,
140
+ elem_id="source-textbox",
141
+ )
142
+ with gradio.Row(): # clear/auto-complete/Language Model
143
+ # We'll keep the language model for English text completion
144
+ language_model_dropdown = gradio.Dropdown(
145
+ choices=[
146
+ slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value,
147
+ ],
148
+ value=slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value,
149
+ label="Select language model to Generate sample text",
150
+ )
151
+
152
+ auto_complete_button = gradio.Button(
153
+ "Auto-Complete", elem_id="auto-complete-button"
154
+ )
155
+ auto_complete_button.click(
156
+ auto_complete_text,
157
+ inputs=[language_model_dropdown, source_textbox],
158
+ outputs=[source_textbox],
159
+ api_name=False,
160
+ )
161
+ clear_button = gradio.ClearButton(source_textbox, api_name=False)
162
+
163
+ with gradio.Row(): # Translate Button
164
+ translate_button = gradio.Button("Translate", variant="primary")
165
+ translate_button.click(
166
+ translate,
167
+ inputs=[
168
+ source_textbox,
169
+ sign_lang_dropdown,
170
+ output_format_dropdown,
171
+ ],
172
+ outputs=[output_video],
173
+ api_name="translate",
174
+ )
175
+
176
+ with gradio.Column(): # Outputs
177
+ gradio.Markdown("## Output Sign Language")
178
+ output_video = gradio.Video(
179
+ format="mp4",
180
+ label="Synthesized Sign Language Video",
181
+ autoplay=True,
182
+ show_download_button=True,
183
+ include_audio=False,
184
+ )
185
+
186
+ gradio.Examples(
187
+ [
188
+ ["We are here to use this.", "pakistan-sign-language", "video"],
189
+ ["I admire art.", "pakistan-sign-language", "landmarks"],
190
+ ["This is very good.", "pakistan-sign-language", "video"],
191
+ ["That work was easy.", "pakistan-sign-language", "landmarks"],
192
+ ["How are you?", "pakistan-sign-language", "video"],
193
+ ["Five hours.", "pakistan-sign-language", "landmarks"],
194
+ ],
195
+ inputs=[
196
+ source_textbox,
197
+ sign_lang_dropdown,
198
+ output_format_dropdown,
199
+ ],
200
+ outputs=output_video,
201
+ )
202
+ request_logger.setup(
203
+ [
204
+ source_textbox,
205
+ gradio.Markdown(label="Language", value="en"),
206
+ sign_lang_dropdown,
207
+ gradio.Markdown(label="Exception"),
208
+ gradio.Markdown(label="Timestamp"),
209
+ ],
210
+ "flagged",
211
+ )
212
+
213
+ if __name__ == "__main__":
214
+ gradio_app.launch()
packages.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ x264
2
+ libx264-dev
3
+ ffmpeg
4
+ python3-opencv
requirements (5).txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ sign-language-translator==0.8.*
2
+ opencv-python