walaa2022 commited on
Commit
30e8ad8
·
verified ·
1 Parent(s): 9aa59b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -197
app.py CHANGED
@@ -1,214 +1,147 @@
1
  import os
2
- import re
3
- from datetime import datetime
4
 
5
- import gradio
6
- import sign_language_translator as slt
7
-
8
- DESCRIPTION = """Enter your English text and click Submit to generate a sign language video. [`Library Repository`](https://github.com/sign-language-translator/sign-language-translator)
9
- The text is preprocessed, tokenized and rearranged and then each token is mapped to a prerecorded video which are concatenated and returned. [`Model Code`](https://github.com/sign-language-translator/sign-language-translator/blob/main/sign_language_translator/models/text_to_sign/concatenative_synthesis.py)
10
- > **NOTE**
11
- > - This model only supports a fixed vocabulary. See the [`*-dictionary-mapping.json`](https://github.com/sign-language-translator/sign-language-datasets/tree/main/parallel_texts) files for supported words.
12
- > - This version needs to re-encode the generated video so that will take some extra time after translation.
13
- > - Since this is a rule-based model, you will have to add **context** to ambiguous words (e.g. glass(material) vs glass(container)).
14
- """.strip()
15
 
16
  TITLE = "English to Sign Language Translator"
 
 
17
 
18
- CUSTOM_CSS = """
19
- #auto-complete-button {
20
- border-color: var(--button-primary-border-color-hover);
21
- }
22
  """
23
 
24
- HF_TOKEN = os.getenv("HF_TOKEN")
25
- request_logger = (
26
- gradio.HuggingFaceDatasetSaver(
27
- HF_TOKEN,
28
- "sltAI/crowdsourced-text-to-sign-language-rule-based-translation-corpus",
29
- )
30
- if HF_TOKEN
31
- else gradio.CSVLogger()
32
- )
33
-
34
- translation_model = slt.models.ConcatenativeSynthesis("en", "pk-sl", "video")
35
- language_models = {}
36
-
37
- def auto_complete_text(model_code: str, text: str):
38
- if model_code not in language_models:
39
- lm = slt.get_model(model_code)
40
- language_models[model_code] = slt.models.BeamSampling(
41
- lm, # type: ignore
42
- start_of_sequence_token=getattr(lm, "start_of_sequence_token", "<"), # type: ignore
43
- end_of_sequence_token=getattr(lm, "end_of_sequence_token", ">"), # type: ignore
44
- )
45
-
46
- tokens = [w for w in re.split(r"\b", text) if w]
47
- lm = language_models[model_code]
48
- lm.max_length = len(tokens) + 10
49
- completion, _ = lm.complete(tokens or None)
50
- if completion[0] == lm.start_of_sequence_token: # type: ignore
51
- completion = completion[1:] # type: ignore
52
- if completion[-1] == lm.end_of_sequence_token: # type: ignore
53
- completion = completion[:-1] # type: ignore
54
- new_text = "".join(completion)
55
-
56
- return new_text
57
-
58
-
59
- def text_to_video(
60
- text: str,
61
- sign_language: str = "pakistan-sign-language",
62
- sign_format: str = "video",
63
- output_path: str = "output.mp4",
64
- codec="h264", # ToDo: install h264 codec for opencv
65
- ):
66
- translation_model.text_language = "en" # Hardcoded to English
67
- translation_model.sign_language = sign_language
68
- translation_model.sign_format = sign_format
69
- if sign_format == "landmarks":
70
- translation_model.sign_embedding_model = "mediapipe-world"
71
-
72
- # Convert first letter to lowercase as per original code
73
- text = text[:1].lower() + text[1:]
74
 
75
- sign = translation_model.translate(text)
76
- if isinstance(sign, slt.Landmarks):
77
- # hands moved to pose wrists
78
- sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3]
79
- sign.data[:, 54: , :3] += -sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3]
80
-
81
- sign.save_animation(output_path, overwrite=True)
82
- else:
83
- sign.save(output_path, overwrite=True, codec=codec)
84
-
85
-
86
- def translate(text: str, sign_lang: str, sign_format: str):
87
- log = [
88
- text,
89
- "en",
90
- sign_lang,
91
- None,
92
- datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f"),
93
- ]
94
  try:
95
- path = "output.mp4"
96
- text_to_video(
97
- text,
98
- sign_language=sign_lang,
99
- sign_format=sign_format,
100
- output_path=path,
101
- codec="mp4v",
102
- )
103
- request_logger.flag(log)
104
- return path
105
-
106
- except Exception as exc:
107
- log[3] = str(exc)
108
- request_logger.flag(log)
109
- raise gradio.Error(f"Error during translation: {exc}")
110
-
111
-
112
- with gradio.Blocks(title=TITLE, css=CUSTOM_CSS) as gradio_app:
113
- gradio.Markdown(f"# {TITLE}")
114
- gradio.Markdown(DESCRIPTION)
115
- with gradio.Row():
116
- with gradio.Column(): # Inputs
117
- gradio.Markdown("## Select Output Options")
118
- with gradio.Row():
119
- sign_lang_dropdown = gradio.Dropdown(
120
- choices=[code.value for code in slt.SignLanguageCodes],
121
- value=slt.SignLanguageCodes.PAKISTAN_SIGN_LANGUAGE.value,
122
- label="Sign Language",
123
- )
124
- output_format_dropdown = gradio.Dropdown(
125
- choices=[
126
- slt.SignFormatCodes.VIDEO.value,
127
- slt.SignFormatCodes.LANDMARKS.value,
128
- ],
129
- value=slt.SignFormatCodes.VIDEO.value,
130
- label="Output Format",
131
- )
132
-
133
- gradio.Markdown("## Input English Text")
134
- with gradio.Row(): # Source TextArea
135
- source_textbox = gradio.Textbox(
136
- lines=4,
137
- placeholder="Enter English Text Here...",
138
- label="English Sentence",
139
- show_copy_button=True,
140
- elem_id="source-textbox",
141
- )
142
- with gradio.Row(): # clear/auto-complete/Language Model
143
- # We'll keep the language model for English text completion
144
- language_model_dropdown = gradio.Dropdown(
145
- choices=[
146
- slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value,
147
- ],
148
- value=slt.ModelCodes.TRANSFORMER_LM_EN_SUPPORTED.value,
149
- label="Select language model to Generate sample text",
150
- )
151
-
152
- auto_complete_button = gradio.Button(
153
- "Auto-Complete", elem_id="auto-complete-button"
154
- )
155
- auto_complete_button.click(
156
- auto_complete_text,
157
- inputs=[language_model_dropdown, source_textbox],
158
- outputs=[source_textbox],
159
- api_name=False,
160
- )
161
- clear_button = gradio.ClearButton(source_textbox, api_name=False)
162
-
163
- with gradio.Row(): # Translate Button
164
- translate_button = gradio.Button("Translate", variant="primary")
165
- translate_button.click(
166
- translate,
167
- inputs=[
168
- source_textbox,
169
- sign_lang_dropdown,
170
- output_format_dropdown,
171
- ],
172
- outputs=[output_video],
173
- api_name="translate",
174
- )
175
-
176
- with gradio.Column(): # Outputs
177
- gradio.Markdown("## Output Sign Language")
178
- output_video = gradio.Video(
179
  format="mp4",
180
- label="Synthesized Sign Language Video",
181
  autoplay=True,
182
- show_download_button=True,
183
- include_audio=False,
184
  )
185
-
186
- gradio.Examples(
187
- [
188
- ["We are here to use this.", "pakistan-sign-language", "video"],
189
- ["I admire art.", "pakistan-sign-language", "landmarks"],
190
- ["This is very good.", "pakistan-sign-language", "video"],
191
- ["That work was easy.", "pakistan-sign-language", "landmarks"],
192
- ["How are you?", "pakistan-sign-language", "video"],
193
- ["Five hours.", "pakistan-sign-language", "landmarks"],
194
- ],
195
- inputs=[
196
- source_textbox,
197
- sign_lang_dropdown,
198
- output_format_dropdown,
199
  ],
200
- outputs=output_video,
 
 
201
  )
202
- request_logger.setup(
203
- [
204
- source_textbox,
205
- gradio.Markdown(label="Language", value="en"),
206
- sign_lang_dropdown,
207
- gradio.Markdown(label="Exception"),
208
- gradio.Markdown(label="Timestamp"),
209
- ],
210
- "flagged",
 
 
 
211
  )
 
 
 
212
 
 
213
  if __name__ == "__main__":
214
- gradio_app.launch()
 
1
  import os
2
+ import sys
3
+ import gradio as gr
4
 
5
+ # Install required packages if not already installed
6
+ try:
7
+ import sign_language_translator as slt
8
+ except ImportError:
9
+ print("Installing sign-language-translator...")
10
+ os.system("pip install sign-language-translator --quiet")
11
+ import sign_language_translator as slt
 
 
 
12
 
13
  TITLE = "English to Sign Language Translator"
14
+ DESCRIPTION = """Enter your English text and click Translate to generate a sign language video.
15
+ The text is preprocessed, tokenized and each token is mapped to a prerecorded video which are concatenated and returned.
16
 
17
+ **NOTE:**
18
+ - This model only supports a fixed vocabulary of common words
19
+ - First-time loading may take a moment as the model downloads
20
+ - For best results, use simple sentences with common words
21
  """
22
 
23
+ # Initialize the translation model (this may take some time on first run)
24
+ def get_model():
25
+ try:
26
+ return slt.models.ConcatenativeSynthesis("en", "pk-sl", "video")
27
+ except Exception as e:
28
+ print(f"Error initializing model: {str(e)}")
29
+ return None
30
+
31
+ # Global model variable
32
+ model = None
33
+
34
+ def initialize_model():
35
+ global model
36
+ if model is None:
37
+ model = get_model()
38
+ return model is not None
39
+
40
+ def translate_text(text, format_type):
41
+ """Translate English text to sign language video"""
42
+ if not text:
43
+ return None, "Please enter some text to translate."
44
+
45
+ # Initialize model if not already done
46
+ if not initialize_model():
47
+ return None, "Failed to initialize the translation model. Please try again."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  try:
50
+ # Format the text (lowercase first letter as required by model)
51
+ text = text[:1].lower() + text[1:] if text else ""
52
+
53
+ # Configure model
54
+ model.text_language = "en"
55
+ model.sign_language = "pk-sl"
56
+ model.sign_format = format_type
57
+
58
+ if format_type == "landmarks":
59
+ model.sign_embedding_model = "mediapipe-world"
60
+
61
+ # Translate
62
+ output_path = "output.mp4"
63
+ sign = model.translate(text)
64
+
65
+ # Save output
66
+ if isinstance(sign, slt.Landmarks):
67
+ # Position hands correctly
68
+ sign.data[:, 33:54, :3] += -sign.data[:, 33:34, :3] + sign.data[:, 15:16, :3]
69
+ sign.data[:, 54:, :3] += -sign.data[:, 54:55, :3] + sign.data[:, 16:17, :3]
70
+ sign.save_animation(output_path, overwrite=True)
71
+ else:
72
+ sign.save(output_path, overwrite=True, codec="mp4v")
73
+
74
+ return output_path, f"Successfully translated: '{text}'"
75
+
76
+ except Exception as e:
77
+ error_msg = str(e)
78
+ print(f"Translation error: {error_msg}")
79
+ return None, f"Error during translation: {error_msg}"
80
+
81
+ # Create the Gradio interface
82
+ with gr.Blocks(title=TITLE) as demo:
83
+ gr.Markdown(f"# {TITLE}")
84
+ gr.Markdown(DESCRIPTION)
85
+
86
+ with gr.Row():
87
+ with gr.Column():
88
+ # Input area
89
+ text_input = gr.Textbox(
90
+ lines=4,
91
+ placeholder="Enter English text here...",
92
+ label="English Text"
93
+ )
94
+
95
+ format_dropdown = gr.Dropdown(
96
+ choices=["video", "landmarks"],
97
+ value="video",
98
+ label="Output Format"
99
+ )
100
+
101
+ with gr.Row():
102
+ clear_btn = gr.Button("Clear")
103
+ translate_btn = gr.Button("Translate", variant="primary")
104
+
105
+ status_output = gr.Textbox(label="Status", interactive=False)
106
+
107
+ with gr.Column():
108
+ # Output video
109
+ video_output = gr.Video(
110
+ label="Sign Language Output",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  format="mp4",
 
112
  autoplay=True,
113
+ show_download_button=True
 
114
  )
115
+
116
+ # Examples
117
+ gr.Examples(
118
+ examples=[
119
+ ["Hello, how are you?", "video"],
120
+ ["My name is John.", "video"],
121
+ ["Nice to meet you.", "video"],
122
+ ["I want to learn sign language.", "video"]
 
 
 
 
 
 
123
  ],
124
+ inputs=[text_input, format_dropdown],
125
+ outputs=[video_output, status_output],
126
+ fn=translate_text
127
  )
128
+
129
+ # Event handlers
130
+ translate_btn.click(
131
+ fn=translate_text,
132
+ inputs=[text_input, format_dropdown],
133
+ outputs=[video_output, status_output]
134
+ )
135
+
136
+ clear_btn.click(
137
+ fn=lambda: ("", "Input cleared"),
138
+ inputs=None,
139
+ outputs=[text_input, status_output]
140
  )
141
+
142
+ # Initialize model on load (not blocking)
143
+ demo.load(lambda: None, None, None)
144
 
145
+ # Launch the app
146
  if __name__ == "__main__":
147
+ demo.launch()