walaa2022 commited on
Commit
80bb9b6
·
verified ·
1 Parent(s): 8273ab0

Upload multilingual-sign-app.py

Browse files
Files changed (1) hide show
  1. multilingual-sign-app.py +360 -0
multilingual-sign-app.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+ import requests
5
+ import json
6
+ from datetime import datetime
7
+ import tempfile
8
+ import uuid
9
+
10
+ # Install required packages if not already installed
11
+ try:
12
+ import mediapipe as mp
13
+ import cv2
14
+ import numpy as np
15
+ from googletrans import Translator
16
+ except ImportError:
17
+ print("Installing required packages...")
18
+ os.system("pip install mediapipe opencv-python numpy googletrans==4.0.0-rc1 --quiet")
19
+ import mediapipe as mp
20
+ import cv2
21
+ import numpy as np
22
+ from googletrans import Translator
23
+
24
+ TITLE = "Multilingual Sign Language Customer Assistant"
25
+ DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
26
+ The system automatically detects the input language and generates appropriate sign language visuals.
27
+
28
+ **Features:**
29
+ - Supports both English and Arabic text
30
+ - Uses SignDict for English sign language vocabulary
31
+ - Uses ArSL for Arabic sign language
32
+ - Perfect for customer service and assistance scenarios
33
+ """
34
+
35
+ # Initialize the translation components
36
+ translator = Translator()
37
+ mp_hands = mp.solutions.hands
38
+ mp_drawing = mp.solutions.drawing_utils
39
+ mp_drawing_styles = mp.solutions.drawing_styles
40
+ mp_pose = mp.solutions.pose
41
+
42
+ # SignDict - dictionary of common signs in both languages
43
+ # In a production app, these would link to pre-recorded videos or 3D animations
44
+ SIGN_DICT = {
45
+ "en": {
46
+ "hello": "signs/en/hello.mp4",
47
+ "welcome": "signs/en/welcome.mp4",
48
+ "thank you": "signs/en/thank_you.mp4",
49
+ "help": "signs/en/help.mp4",
50
+ "yes": "signs/en/yes.mp4",
51
+ "no": "signs/en/no.mp4",
52
+ "please": "signs/en/please.mp4",
53
+ "wait": "signs/en/wait.mp4",
54
+ "sorry": "signs/en/sorry.mp4",
55
+ "how": "signs/en/how.mp4",
56
+ "what": "signs/en/what.mp4",
57
+ "where": "signs/en/where.mp4",
58
+ "when": "signs/en/when.mp4",
59
+ "who": "signs/en/who.mp4",
60
+ "why": "signs/en/why.mp4",
61
+ "customer": "signs/en/customer.mp4",
62
+ "service": "signs/en/service.mp4",
63
+ "support": "signs/en/support.mp4",
64
+ "information": "signs/en/information.mp4",
65
+ "question": "signs/en/question.mp4",
66
+ "answer": "signs/en/answer.mp4",
67
+ },
68
+ "ar": {
69
+ "مرحبا": "signs/ar/hello.mp4",
70
+ "أهلا وسهلا": "signs/ar/welcome.mp4",
71
+ "شكرا": "signs/ar/thank_you.mp4",
72
+ "مساعدة": "signs/ar/help.mp4",
73
+ "نعم": "signs/ar/yes.mp4",
74
+ "لا": "signs/ar/no.mp4",
75
+ "من فضلك": "signs/ar/please.mp4",
76
+ "انتظر": "signs/ar/wait.mp4",
77
+ "آسف": "signs/ar/sorry.mp4",
78
+ "كيف": "signs/ar/how.mp4",
79
+ "ماذا": "signs/ar/what.mp4",
80
+ "أين": "signs/ar/where.mp4",
81
+ "متى": "signs/ar/when.mp4",
82
+ "من": "signs/ar/who.mp4",
83
+ "لماذا": "signs/ar/why.mp4",
84
+ "عميل": "signs/ar/customer.mp4",
85
+ "خدمة": "signs/ar/service.mp4",
86
+ "دعم": "signs/ar/support.mp4",
87
+ "معلومات": "signs/ar/information.mp4",
88
+ "سؤال": "signs/ar/question.mp4",
89
+ "إجابة": "signs/ar/answer.mp4",
90
+ }
91
+ }
92
+
93
+ def detect_language(text):
94
+ """Detect if text is primarily English or Arabic"""
95
+ if not text:
96
+ return "unknown"
97
+
98
+ # Simple detection by character set
99
+ arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
100
+ english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
101
+
102
+ arabic_count = sum(1 for char in text if char in arabic_chars)
103
+ english_count = sum(1 for char in text if char in english_chars)
104
+
105
+ if arabic_count > english_count:
106
+ return "ar"
107
+ elif english_count > 0:
108
+ return "en"
109
+ else:
110
+ return "unknown"
111
+
112
+ def tokenize_text(text, language):
113
+ """Split text into tokens that can be matched to signs"""
114
+ if language == "ar":
115
+ # Arabic tokenization
116
+ tokens = text.split()
117
+ # Check for phrases
118
+ phrases = []
119
+ i = 0
120
+ while i < len(tokens):
121
+ # Try to match longest phrases first
122
+ matched = False
123
+ for j in range(min(3, len(tokens) - i), 0, -1):
124
+ phrase = " ".join(tokens[i:i+j])
125
+ if phrase in SIGN_DICT[language]:
126
+ phrases.append(phrase)
127
+ i += j
128
+ matched = True
129
+ break
130
+ if not matched:
131
+ phrases.append(tokens[i])
132
+ i += 1
133
+ return phrases
134
+ else:
135
+ # English tokenization
136
+ tokens = text.lower().split()
137
+ phrases = []
138
+ i = 0
139
+ while i < len(tokens):
140
+ matched = False
141
+ for j in range(min(3, len(tokens) - i), 0, -1):
142
+ phrase = " ".join(tokens[i:i+j])
143
+ if phrase in SIGN_DICT[language]:
144
+ phrases.append(phrase)
145
+ i += j
146
+ matched = True
147
+ break
148
+ if not matched:
149
+ phrases.append(tokens[i])
150
+ i += 1
151
+ return phrases
152
+
153
+ def translate_if_needed(text, source_lang, target_lang):
154
+ """Translate text if it's not already in the target language"""
155
+ if source_lang == target_lang:
156
+ return text
157
+
158
+ try:
159
+ translation = translator.translate(text, src=source_lang, dest=target_lang)
160
+ return translation.text
161
+ except Exception as e:
162
+ print(f"Translation error: {str(e)}")
163
+ return text
164
+
165
+ def generate_default_sign_video(text, output_path, language="en"):
166
+ """Generate a simple video with the text when no sign is available"""
167
+ # Create a black frame with text
168
+ height, width = 480, 640
169
+ fps = 30
170
+ seconds = 2
171
+
172
+ # Create a VideoWriter object
173
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
174
+ video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
175
+
176
+ # Create frames with text
177
+ font = cv2.FONT_HERSHEY_SIMPLEX
178
+ font_scale = 1
179
+ font_color = (255, 255, 255) # White
180
+ line_type = 2
181
+
182
+ # Text positioning
183
+ text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
184
+ text_x = (width - text_size[0]) // 2
185
+ text_y = (height + text_size[1]) // 2
186
+
187
+ # Write frames
188
+ for _ in range(fps * seconds):
189
+ frame = np.zeros((height, width, 3), dtype=np.uint8)
190
+ cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
191
+ video.write(frame)
192
+
193
+ video.release()
194
+ return output_path
195
+
196
+ def create_avatar_animation(text, output_path, language="en"):
197
+ """Create a 3D avatar animation for the sign (simplified version)"""
198
+ # In a real implementation, this would use a 3D avatar system
199
+ # Here we'll just simulate it with a basic animation
200
+
201
+ width, height = 640, 480
202
+ fps = 30
203
+ duration = 3 # seconds
204
+
205
+ # Create video writer
206
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
207
+ video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
208
+
209
+ # Create a simple animation with hands
210
+ frames = fps * duration
211
+ for i in range(frames):
212
+ # Create a dark blue background
213
+ frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([100, 60, 20], dtype=np.uint8)
214
+
215
+ # Draw a simple avatar body
216
+ cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
217
+ cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
218
+
219
+ # Animate hands based on frame number
220
+ t = i / frames
221
+ # Left hand movement
222
+ x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
223
+ y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
224
+ # Right hand movement
225
+ x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
226
+ y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
227
+
228
+ # Draw hands
229
+ cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
230
+ cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
231
+
232
+ # Add text with current sign
233
+ font = cv2.FONT_HERSHEY_SIMPLEX
234
+ cv2.putText(frame, text, (width//2-100, height-50), font, 1, (255, 255, 255), 2)
235
+ if language == "ar":
236
+ # Right-to-left indicator
237
+ cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (255, 255, 255), 1)
238
+
239
+ video.write(frame)
240
+
241
+ video.release()
242
+ return output_path
243
+
244
+ def generate_sign_video(tokens, language, output_format="3D"):
245
+ """Generate sign language video for the given tokens"""
246
+ # For each token, either find a pre-recorded video or generate one
247
+ temp_dir = tempfile.gettempdir()
248
+ output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
249
+
250
+ # In a real implementation, this would concatenate actual sign videos
251
+ # For this demo, we'll create a simple animation
252
+ if language in SIGN_DICT and tokens[0] in SIGN_DICT[language]:
253
+ # In a real implementation, this would load the video file
254
+ # For demo purposes, we'll create an animation
255
+ create_avatar_animation(tokens[0], output_path, language)
256
+ else:
257
+ # Generate a default video with text
258
+ generate_default_sign_video(tokens[0], output_path, language)
259
+
260
+ return output_path
261
+
262
+ def translate_to_sign(text, output_format="3D"):
263
+ """Main function to translate text to sign language video"""
264
+ if not text:
265
+ return None, ""
266
+
267
+ # Detect the input language
268
+ language = detect_language(text)
269
+ if language == "unknown":
270
+ return None, "Could not determine the language. Please use English or Arabic."
271
+
272
+ try:
273
+ # Tokenize the text
274
+ tokens = tokenize_text(text, language)
275
+ if not tokens:
276
+ return None, "No translatable tokens found."
277
+
278
+ # Generate sign language video
279
+ video_path = generate_sign_video(tokens, language, output_format)
280
+
281
+ # Prepare status message
282
+ if language == "en":
283
+ status = f"Translated English: \"{text}\" to sign language."
284
+ else:
285
+ status = f"Translated Arabic: \"{text}\" to sign language."
286
+
287
+ return video_path, status
288
+
289
+ except Exception as e:
290
+ error_msg = str(e)
291
+ print(f"Error during translation: {error_msg}")
292
+ return None, f"Error during translation: {error_msg}"
293
+
294
+ # Create the Gradio interface
295
+ with gr.Blocks(title=TITLE) as demo:
296
+ gr.Markdown(f"# {TITLE}")
297
+ gr.Markdown(DESCRIPTION)
298
+
299
+ with gr.Row():
300
+ with gr.Column():
301
+ # Input area
302
+ text_input = gr.Textbox(
303
+ lines=4,
304
+ placeholder="Enter English or Arabic text here...",
305
+ label="Text Input"
306
+ )
307
+
308
+ format_dropdown = gr.Dropdown(
309
+ choices=["3D", "2D"],
310
+ value="3D",
311
+ label="Avatar Style"
312
+ )
313
+
314
+ with gr.Row():
315
+ clear_btn = gr.Button("Clear")
316
+ translate_btn = gr.Button("Translate to Sign Language", variant="primary")
317
+
318
+ # Status area
319
+ status_output = gr.Textbox(label="Status", interactive=False)
320
+
321
+ with gr.Column():
322
+ # Output video
323
+ video_output = gr.Video(
324
+ label="Sign Language Output",
325
+ format="mp4",
326
+ autoplay=True,
327
+ show_download_button=True
328
+ )
329
+
330
+ # Examples in both languages
331
+ gr.Examples(
332
+ examples=[
333
+ ["Hello, how can I help you today?"],
334
+ ["Please wait while I check your account."],
335
+ ["Thank you for your patience."],
336
+ ["مرحبا، كيف يمكنني مساعدتك اليوم؟"],
337
+ ["من فضلك انتظر بينما أتحقق من حسابك."],
338
+ ["شكرا لصبرك."]
339
+ ],
340
+ inputs=[text_input],
341
+ outputs=[video_output, status_output],
342
+ fn=lambda text: translate_to_sign(text)
343
+ )
344
+
345
+ # Event handlers
346
+ translate_btn.click(
347
+ fn=translate_to_sign,
348
+ inputs=[text_input, format_dropdown],
349
+ outputs=[video_output, status_output]
350
+ )
351
+
352
+ clear_btn.click(
353
+ fn=lambda: ("", "Input cleared"),
354
+ inputs=None,
355
+ outputs=[text_input, status_output]
356
+ )
357
+
358
+ # Launch the app
359
+ if __name__ == "__main__":
360
+ demo.launch()