walaa2022 commited on
Commit
c2dfdef
·
verified ·
1 Parent(s): d5cc7a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +351 -282
app.py CHANGED
@@ -1,117 +1,163 @@
1
  import os
2
  import sys
3
  import gradio as gr
4
- import requests
5
- import json
6
- from datetime import datetime
7
  import tempfile
8
  import uuid
9
- import re
 
 
 
 
 
 
 
10
 
11
- # Install required packages if not already installed
12
  try:
13
- import mediapipe as mp
14
- import cv2
15
- import numpy as np
16
  except ImportError:
17
- print("Installing required packages...")
18
- os.system("pip install mediapipe opencv-python numpy --quiet")
19
- import mediapipe as mp
20
- import cv2
21
- import numpy as np
22
 
23
- TITLE = "Multilingual Sign Language Customer Assistant"
24
- DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
25
- The system automatically detects the input language and generates appropriate sign language visuals.
 
26
 
27
  **Features:**
28
- - Supports both English and Arabic text
29
- - Uses 3D avatar technology to generate sign language
30
- - Perfect for customer service and assistance scenarios
 
31
  """
32
 
33
  # Initialize MediaPipe
34
  mp_hands = mp.solutions.hands
35
  mp_drawing = mp.solutions.drawing_utils
36
  mp_drawing_styles = mp.solutions.drawing_styles
37
- mp_pose = mp.solutions.pose
38
 
39
- # Dictionary of translations for common customer service phrases
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  TRANSLATIONS = {
41
  "hello": "مرحبا",
 
42
  "welcome": "أهلا وسهلا",
43
  "thank you": "شكرا",
 
 
 
44
  "help": "مساعدة",
45
  "yes": "نعم",
46
  "no": "لا",
47
- "please": "من فضلك",
48
- "wait": "انتظر",
49
- "sorry": "آسف",
50
  "how can i help you": "كيف يمكنني مساعدتك",
51
  "customer": "عميل",
52
  "service": "خدمة",
53
- "support": "دعم",
54
- "information": "معلومات",
55
- "question": "سؤال",
56
- "answer": "إجابة",
57
  }
58
 
59
- # SignDict - dictionary of common signs in both languages
60
- # In a production app, these would link to pre-recorded videos or 3D animations
61
- SIGN_DICT = {
62
- "en": {
63
- "hello": "signs/en/hello.mp4",
64
- "welcome": "signs/en/welcome.mp4",
65
- "thank you": "signs/en/thank_you.mp4",
66
- "help": "signs/en/help.mp4",
67
- "yes": "signs/en/yes.mp4",
68
- "no": "signs/en/no.mp4",
69
- "please": "signs/en/please.mp4",
70
- "wait": "signs/en/wait.mp4",
71
- "sorry": "signs/en/sorry.mp4",
72
- "how": "signs/en/how.mp4",
73
- "what": "signs/en/what.mp4",
74
- "where": "signs/en/where.mp4",
75
- "when": "signs/en/when.mp4",
76
- "who": "signs/en/who.mp4",
77
- "why": "signs/en/why.mp4",
78
- "customer": "signs/en/customer.mp4",
79
- "service": "signs/en/service.mp4",
80
- "support": "signs/en/support.mp4",
81
- "information": "signs/en/information.mp4",
82
- "question": "signs/en/question.mp4",
83
- "answer": "signs/en/answer.mp4",
84
- },
85
- "ar": {
86
- "مرحبا": "signs/ar/hello.mp4",
87
- "أهلا وسهلا": "signs/ar/welcome.mp4",
88
- "شكرا": "signs/ar/thank_you.mp4",
89
- "مساعدة": "signs/ar/help.mp4",
90
- "نعم": "signs/ar/yes.mp4",
91
- "لا": "signs/ar/no.mp4",
92
- "من فضلك": "signs/ar/please.mp4",
93
- "انتظر": "signs/ar/wait.mp4",
94
- "آسف": "signs/ar/sorry.mp4",
95
- "كيف": "signs/ar/how.mp4",
96
- "ماذا": "signs/ar/what.mp4",
97
- "أين": "signs/ar/where.mp4",
98
- "متى": "signs/ar/when.mp4",
99
- "من": "signs/ar/who.mp4",
100
- "لماذا": "signs/ar/why.mp4",
101
- "عميل": "signs/ar/customer.mp4",
102
- "خدمة": "signs/ar/service.mp4",
103
- "دعم": "signs/ar/support.mp4",
104
- "معلومات": "signs/ar/information.mp4",
105
- "سؤال": "signs/ar/question.mp4",
106
- "إجابة": "signs/ar/answer.mp4",
107
- }
108
- }
 
 
 
 
 
 
109
 
110
  def detect_language(text):
111
- """Detect if text is primarily English or Arabic"""
112
  if not text:
113
  return "unknown"
114
-
115
  # Simple detection by character set
116
  arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
117
  english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
@@ -126,230 +172,253 @@ def detect_language(text):
126
  else:
127
  return "unknown"
128
 
129
- def translate_text(text, source_lang, target_lang):
130
- """Simple dictionary-based translation"""
131
- if source_lang == target_lang:
132
- return text
133
-
134
- # Convert to lowercase for matching
135
- text_lower = text.lower()
136
-
137
- # For English to Arabic
138
- if source_lang == "en" and target_lang == "ar":
139
- for eng, ar in TRANSLATIONS.items():
140
- text_lower = text_lower.replace(eng, ar)
141
- return text_lower
142
 
143
- # For Arabic to English
144
- if source_lang == "ar" and target_lang == "en":
145
- for eng, ar in TRANSLATIONS.items():
146
- text_lower = text_lower.replace(ar, eng)
147
- return text_lower
148
 
149
- return text # Return original if no translation path
150
-
151
- def tokenize_text(text, language):
152
- """Split text into tokens that can be matched to signs"""
153
- if language == "ar":
154
- # Arabic tokenization
155
- tokens = text.split()
156
- # Check for phrases
157
- phrases = []
158
- i = 0
159
- while i < len(tokens):
160
- # Try to match longest phrases first
161
- matched = False
162
- for j in range(min(3, len(tokens) - i), 0, -1):
163
- phrase = " ".join(tokens[i:i+j])
164
- if phrase in SIGN_DICT[language]:
165
- phrases.append(phrase)
166
- i += j
167
- matched = True
168
- break
169
- if not matched:
170
- phrases.append(tokens[i])
171
- i += 1
172
- return phrases
173
  else:
174
- # English tokenization
175
- tokens = text.lower().split()
176
- phrases = []
177
- i = 0
178
- while i < len(tokens):
179
- matched = False
180
- for j in range(min(3, len(tokens) - i), 0, -1):
181
- phrase = " ".join(tokens[i:i+j])
182
- if phrase in SIGN_DICT[language]:
183
- phrases.append(phrase)
184
- i += j
185
- matched = True
186
- break
187
- if not matched:
188
- phrases.append(tokens[i])
189
- i += 1
190
- return phrases
191
 
192
- def generate_default_sign_video(text, output_path, language="en"):
193
- """Generate a simple video with the text when no sign is available"""
194
- # Create a black frame with text
195
- height, width = 480, 640
196
- fps = 30
197
- seconds = 2
198
 
199
- # Create a VideoWriter object
200
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
201
- video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
202
-
203
- # Create frames with text
204
- font = cv2.FONT_HERSHEY_SIMPLEX
205
- font_scale = 1
206
- font_color = (255, 255, 255) # White
207
- line_type = 2
208
-
209
- # Text positioning
210
- text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
211
- text_x = (width - text_size[0]) // 2
212
- text_y = (height + text_size[1]) // 2
213
 
214
- # Write frames
215
- for _ in range(fps * seconds):
216
- frame = np.zeros((height, width, 3), dtype=np.uint8)
217
- cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
218
- video.write(frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
- video.release()
221
- return output_path
222
 
223
- def create_avatar_animation(text, output_path, language="en", style="3D"):
224
- """Create a 3D avatar animation for the sign (simplified version)"""
225
- width, height = 640, 480
226
- fps = 30
227
- duration = 3 # seconds
228
-
229
- # Create video writer
230
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
231
- video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
232
-
233
- # Create a simple animation with hands
234
- frames = fps * duration
235
- for i in range(frames):
236
- # Create a background based on style
237
- if style == "3D":
238
- # Create a gradient background
239
- frame = np.zeros((height, width, 3), dtype=np.uint8)
240
- for y in range(height):
241
- for x in range(width):
242
- frame[y, x] = [
243
- int(100 + 50 * (x / width)),
244
- int(60 + 30 * (y / height)),
245
- int(120 + 40 * ((x+y) / (width+height)))
246
- ]
247
- else:
248
- # Simple solid background for 2D
249
- frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)
250
 
251
- # Draw a simple avatar
252
- if style == "3D":
253
- # 3D-style avatar
254
- # Body
255
- cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
256
- # Head
257
- cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
258
-
259
- # Animate hands based on frame number
260
- t = i / frames
261
- # Left hand movement
262
- x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
263
- y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
264
- # Right hand movement
265
- x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
266
- y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
267
 
268
- # Draw hands
269
- cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
270
- cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
271
- else:
272
- # 2D-style signing
273
- # Drawing a simplified 2D signer
274
- cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body
275
- cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head
276
 
277
- # Animated hands for signing
278
- t = i / frames
279
- angle1 = t * 2 * np.pi
280
- angle2 = t * 2 * np.pi + np.pi/2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
- # Left arm
283
- x1 = int(width//2)
284
- y1 = int(height//2 - 70)
285
- x2 = int(x1 - 60 * np.cos(angle1))
286
- y2 = int(y1 + 60 * np.sin(angle1))
287
- cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)
288
 
289
- # Right arm
290
- x3 = int(width//2)
291
- y3 = int(height//2 - 70)
292
- x4 = int(x3 + 60 * np.cos(angle2))
293
- y4 = int(y3 + 60 * np.sin(angle2))
294
- cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)
295
-
296
- # Add text with current sign
297
- font = cv2.FONT_HERSHEY_SIMPLEX
298
- cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
299
- if language == "ar":
300
- # Right-to-left indicator
301
- cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)
302
-
303
- video.write(frame)
304
 
305
- video.release()
306
- return output_path
 
307
 
308
- def generate_sign_video(tokens, language, output_format="3D"):
309
- """Generate sign language video for the given tokens"""
310
- # For each token, either find a pre-recorded video or generate one
311
  temp_dir = tempfile.gettempdir()
312
- output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
313
 
314
- # In a real implementation, this would concatenate actual sign videos
315
- # For this demo, we'll create a simple animation
316
- if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]:
317
- # In a real implementation, this would load the video file
318
- # For demo purposes, we'll create an animation
319
- create_avatar_animation(tokens[0], output_path, language, output_format)
320
- else:
321
- # Generate a default video with text
322
- if tokens:
323
- create_avatar_animation(tokens[0], output_path, language, output_format)
 
 
 
 
 
324
  else:
325
- create_avatar_animation("No tokens", output_path, language, output_format)
 
 
 
326
 
327
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
 
329
- def translate_to_sign(text, output_format="3D"):
330
  """Main function to translate text to sign language video"""
331
  if not text:
332
  return None, ""
333
 
 
 
 
334
  # Detect the input language
335
  language = detect_language(text)
336
  if language == "unknown":
337
  return None, "Could not determine the language. Please use English or Arabic."
338
 
339
  try:
340
- # Tokenize the text
341
- tokens = tokenize_text(text, language)
 
 
 
 
 
 
 
 
 
 
 
 
342
  if not tokens:
343
- return None, "No translatable tokens found."
344
 
345
  # Generate sign language video
346
- video_path = generate_sign_video(tokens, language, output_format)
 
 
 
347
 
348
  # Prepare status message
349
- if language == "en":
350
- status = f"Translated English: \"{text}\" to sign language."
351
- else:
352
- status = f"Translated Arabic: \"{text}\" to sign language."
353
 
354
  return video_path, status
355
 
@@ -372,10 +441,10 @@ with gr.Blocks(title=TITLE) as demo:
372
  label="Text Input"
373
  )
374
 
375
- format_dropdown = gr.Dropdown(
376
- choices=["3D", "2D"],
377
- value="3D",
378
- label="Avatar Style"
379
  )
380
 
381
  with gr.Row():
@@ -388,7 +457,7 @@ with gr.Blocks(title=TITLE) as demo:
388
  with gr.Column():
389
  # Output video
390
  video_output = gr.Video(
391
- label="Sign Language Output",
392
  format="mp4",
393
  autoplay=True,
394
  show_download_button=True
@@ -397,14 +466,14 @@ with gr.Blocks(title=TITLE) as demo:
397
  # Examples in both languages
398
  gr.Examples(
399
  examples=[
400
- ["Hello, how can I help you today?", "3D"],
401
- ["Please wait while I check your account.", "3D"],
402
- ["Thank you for your patience.", "3D"],
403
- ["مرحبا، كيف يمكنني مساعدتك اليوم؟", "3D"],
404
- ["من فضلك انتظر بينما أتحقق من حسابك.", "3D"],
405
- ["شكرا لصبرك.", "3D"]
406
  ],
407
- inputs=[text_input, format_dropdown],
408
  outputs=[video_output, status_output],
409
  fn=translate_to_sign
410
  )
@@ -412,7 +481,7 @@ with gr.Blocks(title=TITLE) as demo:
412
  # Event handlers
413
  translate_btn.click(
414
  fn=translate_to_sign,
415
- inputs=[text_input, format_dropdown],
416
  outputs=[video_output, status_output]
417
  )
418
 
 
1
  import os
2
  import sys
3
  import gradio as gr
 
 
 
4
  import tempfile
5
  import uuid
6
+ import json
7
+ import requests
8
+ from pathlib import Path
9
+ import cv2
10
+ import numpy as np
11
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
12
+ import mediapipe as mp
13
+ import time
14
 
15
+ # Try to import translation library - fallback to local method if not available
16
  try:
17
+ from deep_translator import GoogleTranslator
18
+ translation_available = True
 
19
  except ImportError:
20
+ translation_available = False
 
 
 
 
21
 
22
+ # Define the title and description
23
+ TITLE = "Complete Sign Language Translation System"
24
+ DESCRIPTION = """This application translates English and Arabic text into sign language using real sign language videos.
25
+ It translates Arabic to English when needed, then maps the English text to sign language videos.
26
 
27
  **Features:**
28
+ - Supports both English and Arabic input
29
+ - Uses real sign videos from SignDict and other sources
30
+ - MediaPipe integration for gesture recognition and hand tracking
31
+ - Automatic language detection and translation
32
  """
33
 
34
  # Initialize MediaPipe
35
  mp_hands = mp.solutions.hands
36
  mp_drawing = mp.solutions.drawing_utils
37
  mp_drawing_styles = mp.solutions.drawing_styles
38
+ mp_holistic = mp.solutions.holistic
39
 
40
+ # Define paths for sign language videos
41
+ VIDEO_ROOT = "sign_videos"
42
+ os.makedirs(VIDEO_ROOT, exist_ok=True)
43
+ os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
44
+ os.makedirs(f"{VIDEO_ROOT}/ar", exist_ok=True)
45
+
46
+ # Define video URLs - use these to download videos on first run
47
+ # In a real application, you would have a more extensive database
48
+ SIGN_VIDEOS_URLS = {
49
+ "hello": "https://media.signbsl.com/videos/bsl/signstation/mp4/hello.mp4",
50
+ "thank": "https://media.signbsl.com/videos/bsl/signstation/mp4/thank_you.mp4",
51
+ "yes": "https://media.signbsl.com/videos/bsl/signstation/mp4/yes.mp4",
52
+ "no": "https://media.signbsl.com/videos/bsl/signstation/mp4/no.mp4",
53
+ "please": "https://media.signbsl.com/videos/bsl/signstation/mp4/please.mp4",
54
+ "help": "https://media.signbsl.com/videos/bsl/signstation/mp4/help.mp4",
55
+ }
56
+
57
+ # Define mapping of words to video files
58
+ SIGN_DICT = {
59
+ "en": {
60
+ "hello": f"{VIDEO_ROOT}/en/hello.mp4",
61
+ "hi": f"{VIDEO_ROOT}/en/hello.mp4", # Map to same video
62
+ "welcome": f"{VIDEO_ROOT}/en/welcome.mp4",
63
+ "thank": f"{VIDEO_ROOT}/en/thank.mp4",
64
+ "you": f"{VIDEO_ROOT}/en/you.mp4",
65
+ "thanks": f"{VIDEO_ROOT}/en/thank.mp4", # Map to same video
66
+ "please": f"{VIDEO_ROOT}/en/please.mp4",
67
+ "wait": f"{VIDEO_ROOT}/en/wait.mp4",
68
+ "help": f"{VIDEO_ROOT}/en/help.mp4",
69
+ "yes": f"{VIDEO_ROOT}/en/yes.mp4",
70
+ "no": f"{VIDEO_ROOT}/en/no.mp4",
71
+ "how": f"{VIDEO_ROOT}/en/how.mp4",
72
+ "can": f"{VIDEO_ROOT}/en/can.mp4",
73
+ "i": f"{VIDEO_ROOT}/en/i.mp4",
74
+ "service": f"{VIDEO_ROOT}/en/service.mp4",
75
+ "customer": f"{VIDEO_ROOT}/en/customer.mp4",
76
+ "sorry": f"{VIDEO_ROOT}/en/sorry.mp4",
77
+ }
78
+ }
79
+
80
+ # Create a dictionary for English to Arabic translations and vice versa
81
  TRANSLATIONS = {
82
  "hello": "مرحبا",
83
+ "hi": "مرحبا",
84
  "welcome": "أهلا وسهلا",
85
  "thank you": "شكرا",
86
+ "thanks": "شكرا",
87
+ "please": "من فضلك",
88
+ "wait": "انتظر",
89
  "help": "مساعدة",
90
  "yes": "نعم",
91
  "no": "لا",
 
 
 
92
  "how can i help you": "كيف يمكنني مساعدتك",
93
  "customer": "عميل",
94
  "service": "خدمة",
95
+ "support": "دعم",
96
+ "sorry": "آسف",
 
 
97
  }
98
 
99
+ # Function to download videos if they don't exist
100
+ def download_sign_videos():
101
+ """Download sign language videos on first run"""
102
+ for word, url in SIGN_VIDEOS_URLS.items():
103
+ output_path = f"{VIDEO_ROOT}/en/{word}.mp4"
104
+ if not os.path.exists(output_path):
105
+ try:
106
+ print(f"Downloading {word} sign video...")
107
+ response = requests.get(url)
108
+ if response.status_code == 200:
109
+ with open(output_path, 'wb') as f:
110
+ f.write(response.content)
111
+ print(f"Downloaded {word} sign video")
112
+ else:
113
+ print(f"Failed to download {word} sign video: {response.status_code}")
114
+ except Exception as e:
115
+ print(f"Error downloading {word} sign video: {e}")
116
+
117
+ # Function to create placeholder videos when real ones don't exist yet
118
+ def create_placeholder_video(text, output_path, language="en"):
119
+ """Create a placeholder video with text when a real video isn't available"""
120
+ height, width = 480, 640
121
+ fps = 30
122
+ seconds = 1.5
123
+
124
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
125
+ video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
126
+
127
+ font = cv2.FONT_HERSHEY_SIMPLEX
128
+ font_scale = 1
129
+ font_color = (255, 255, 255)
130
+ line_type = 2
131
+
132
+ # Text positioning
133
+ text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
134
+ text_x = (width - text_size[0]) // 2
135
+ text_y = (height + text_size[1]) // 2
136
+
137
+ for _ in range(int(fps * seconds)):
138
+ # Create a gradient background to look more professional
139
+ frame = np.zeros((height, width, 3), dtype=np.uint8)
140
+ for y in range(height):
141
+ color = int(50 + (y / height) * 100)
142
+ frame[y, :] = [color, color, color + 30]
143
+
144
+ # Add the word text
145
+ cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
146
+
147
+ # Add language indicator
148
+ lang_text = "English" if language == "en" else "Arabic"
149
+ cv2.putText(frame, lang_text, (width - 120, 30), font, 0.7, font_color, 1)
150
+
151
+ video.write(frame)
152
+
153
+ video.release()
154
+ return output_path
155
 
156
  def detect_language(text):
157
+ """Detect if the text is primarily English or Arabic"""
158
  if not text:
159
  return "unknown"
160
+
161
  # Simple detection by character set
162
  arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
163
  english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
 
172
  else:
173
  return "unknown"
174
 
175
+ def translate_arabic_to_english(text):
176
+ """Translate Arabic text to English"""
177
+ if not text:
178
+ return "", "No text to translate"
 
 
 
 
 
 
 
 
 
179
 
180
+ # Check for direct translations in our dictionary
181
+ for eng, ar in TRANSLATIONS.items():
182
+ if ar in text:
183
+ text = text.replace(ar, eng)
 
184
 
185
+ # Try to use deep_translator if available
186
+ if translation_available:
187
+ try:
188
+ translator = GoogleTranslator(source='ar', target='en')
189
+ translation = translator.translate(text)
190
+ return translation, f"Translated to English: {translation}"
191
+ except Exception as e:
192
+ print(f"Translation error: {e}")
193
+ return text, f"Error during translation: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  else:
195
+ # Fallback method - very basic word mapping
196
+ result = text
197
+ for en, ar in TRANSLATIONS.items():
198
+ result = result.replace(ar, en)
199
+ return result, "Used basic translation mapping"
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ def tokenize_text(text, language="en"):
202
+ """Split the text into tokens that can be matched to sign videos"""
203
+ # Convert to lowercase for English
204
+ if language == "en":
205
+ text = text.lower()
 
206
 
207
+ # Simple tokenization by splitting on spaces
208
+ tokens = text.split()
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ # Try to match multi-word phrases first (like "thank you")
211
+ result = []
212
+ i = 0
213
+ while i < len(tokens):
214
+ # Try 3-word phrases, then 2-word, then single words
215
+ matched = False
216
+ for j in range(min(3, len(tokens) - i), 0, -1):
217
+ phrase = " ".join(tokens[i:i+j])
218
+ if language in SIGN_DICT and phrase in SIGN_DICT[language]:
219
+ result.append(phrase)
220
+ i += j
221
+ matched = True
222
+ break
223
+
224
+ # If no match found, add the single token
225
+ if not matched:
226
+ result.append(tokens[i])
227
+ i += 1
228
 
229
+ return result
 
230
 
231
+ def analyze_sign_video(video_path):
232
+ """Use MediaPipe to analyze hand movements in a sign language video"""
233
+ try:
234
+ # Only process if the file exists
235
+ if not os.path.exists(video_path):
236
+ return None, "Video file not found"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ cap = cv2.VideoCapture(video_path)
239
+ if not cap.isOpened():
240
+ return None, "Could not open video file"
241
+
242
+ # Get video properties
243
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
244
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
245
+ fps = cap.get(cv2.CAP_PROP_FPS)
246
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
247
+
248
+ # Initialize MediaPipe Holistic (includes hands, face, and pose)
249
+ with mp_holistic.Holistic(
250
+ min_detection_confidence=0.5,
251
+ min_tracking_confidence=0.5) as holistic:
 
 
252
 
253
+ # Output video with annotations
254
+ output_path = video_path.replace(".mp4", "_analyzed.mp4")
255
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
256
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
 
 
 
 
257
 
258
+ frame_count = 0
259
+ while cap.isOpened():
260
+ success, image = cap.read()
261
+ if not success:
262
+ break
263
+
264
+ # Convert image to RGB and process with MediaPipe
265
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
266
+ results = holistic.process(image_rgb)
267
+
268
+ # Draw landmarks
269
+ annotated_image = image.copy()
270
+
271
+ # Draw face landmarks
272
+ if results.face_landmarks:
273
+ mp_drawing.draw_landmarks(
274
+ annotated_image,
275
+ results.face_landmarks,
276
+ mp_holistic.FACEMESH_CONTOURS,
277
+ landmark_drawing_spec=None,
278
+ connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
279
+
280
+ # Draw pose landmarks
281
+ if results.pose_landmarks:
282
+ mp_drawing.draw_landmarks(
283
+ annotated_image,
284
+ results.pose_landmarks,
285
+ mp_holistic.POSE_CONNECTIONS,
286
+ landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
287
+
288
+ # Draw hand landmarks
289
+ if results.left_hand_landmarks:
290
+ mp_drawing.draw_landmarks(
291
+ annotated_image,
292
+ results.left_hand_landmarks,
293
+ mp_hands.HAND_CONNECTIONS,
294
+ mp_drawing_styles.get_default_hand_landmarks_style(),
295
+ mp_drawing_styles.get_default_hand_connections_style())
296
+
297
+ if results.right_hand_landmarks:
298
+ mp_drawing.draw_landmarks(
299
+ annotated_image,
300
+ results.right_hand_landmarks,
301
+ mp_hands.HAND_CONNECTIONS,
302
+ mp_drawing_styles.get_default_hand_landmarks_style(),
303
+ mp_drawing_styles.get_default_hand_connections_style())
304
+
305
+ # Add progress indicator
306
+ progress = frame_count / total_frames * 100
307
+ cv2.putText(annotated_image, f"Processing: {progress:.1f}%",
308
+ (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
309
+
310
+ # Write the frame to output video
311
+ out.write(annotated_image)
312
+ frame_count += 1
313
 
314
+ cap.release()
315
+ out.release()
 
 
 
 
316
 
317
+ return output_path, f"Analyzed video with MediaPipe. Processed {frame_count} frames."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
+ except Exception as e:
320
+ print(f"Error analyzing video: {e}")
321
+ return None, f"Error analyzing video: {e}"
322
 
323
+ def create_sign_video(tokens, language="en", analyze=False):
324
+ """Create a sign language video by concatenating clips for each token"""
325
+ # Temporary directory for video processing
326
  temp_dir = tempfile.gettempdir()
 
327
 
328
+ # List to store video paths for each token
329
+ video_paths = []
330
+
331
+ # For each token, find or create a video
332
+ for token in tokens:
333
+ # Check if we have a real video for this token
334
+ if language in SIGN_DICT and token in SIGN_DICT[language]:
335
+ video_path = SIGN_DICT[language][token]
336
+ # If the video file doesn't exist, create a placeholder
337
+ if not os.path.exists(video_path):
338
+ placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
339
+ create_placeholder_video(token, placeholder_path, language)
340
+ video_paths.append(placeholder_path)
341
+ else:
342
+ video_paths.append(video_path)
343
  else:
344
+ # Create a placeholder video for this token
345
+ placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
346
+ create_placeholder_video(token, placeholder_path, language)
347
+ video_paths.append(placeholder_path)
348
 
349
+ # If no videos were created, return None
350
+ if not video_paths:
351
+ return None, "No videos were created"
352
+
353
+ # Concatenate all videos
354
+ output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
355
+
356
+ try:
357
+ # Using moviepy to concatenate videos
358
+ clips = [VideoFileClip(vp) for vp in video_paths if os.path.exists(vp)]
359
+ if clips:
360
+ final_clip = concatenate_videoclips(clips)
361
+ final_clip.write_videofile(output_path, codec="libx264", audio=False)
362
+
363
+ # Clean up the temporary clips
364
+ for clip in clips:
365
+ clip.close()
366
+
367
+ # If analyze is True, use MediaPipe to analyze the video
368
+ if analyze:
369
+ analyzed_path, analysis_msg = analyze_sign_video(output_path)
370
+ if analyzed_path:
371
+ return analyzed_path, analysis_msg
372
+
373
+ return output_path, f"Created sign language video with {len(clips)} clips"
374
+ else:
375
+ return None, "No valid video clips were found"
376
+ except Exception as e:
377
+ print(f"Error concatenating videos: {str(e)}")
378
+ # Fallback: return the first video if concatenation fails
379
+ if video_paths and os.path.exists(video_paths[0]):
380
+ return video_paths[0], "Failed to concatenate videos, returning single clip"
381
+ return None, f"Error creating video: {str(e)}"
382
 
383
+ def translate_to_sign(text, analyze_video=False):
384
  """Main function to translate text to sign language video"""
385
  if not text:
386
  return None, ""
387
 
388
+ # Download videos if needed (first run)
389
+ download_sign_videos()
390
+
391
  # Detect the input language
392
  language = detect_language(text)
393
  if language == "unknown":
394
  return None, "Could not determine the language. Please use English or Arabic."
395
 
396
  try:
397
+ # If Arabic, translate to English first
398
+ if language == "ar":
399
+ english_text, translation_status = translate_arabic_to_english(text)
400
+ # Use English dictionary for sign videos
401
+ tokens = tokenize_text(english_text, "en")
402
+ language_for_signs = "en"
403
+
404
+ translation_info = f"Original Arabic: \"{text}\"\n{translation_status}\n"
405
+ else:
406
+ # Use English text directly
407
+ tokens = tokenize_text(text, "en")
408
+ language_for_signs = "en"
409
+ translation_info = ""
410
+
411
  if not tokens:
412
+ return None, translation_info + "No translatable tokens found."
413
 
414
  # Generate sign language video
415
+ video_path, video_status = create_sign_video(tokens, language_for_signs, analyze_video)
416
+
417
+ if not video_path:
418
+ return None, translation_info + "Failed to create sign language video."
419
 
420
  # Prepare status message
421
+ status = translation_info + video_status
 
 
 
422
 
423
  return video_path, status
424
 
 
441
  label="Text Input"
442
  )
443
 
444
+ analyze_checkbox = gr.Checkbox(
445
+ label="Analyze with MediaPipe",
446
+ value=False,
447
+ info="Apply MediaPipe hand tracking to visualize sign gestures"
448
  )
449
 
450
  with gr.Row():
 
457
  with gr.Column():
458
  # Output video
459
  video_output = gr.Video(
460
+ label="Sign Language Video",
461
  format="mp4",
462
  autoplay=True,
463
  show_download_button=True
 
466
  # Examples in both languages
467
  gr.Examples(
468
  examples=[
469
+ ["Hello, how can I help you?", False],
470
+ ["Thank you for your patience.", False],
471
+ ["Yes, please wait.", True], # With analysis
472
+ ["مرحبا", False],
473
+ ["شكرا", False],
474
+ ["نعم، من فضلك انتظر", True], # With analysis
475
  ],
476
+ inputs=[text_input, analyze_checkbox],
477
  outputs=[video_output, status_output],
478
  fn=translate_to_sign
479
  )
 
481
  # Event handlers
482
  translate_btn.click(
483
  fn=translate_to_sign,
484
+ inputs=[text_input, analyze_checkbox],
485
  outputs=[video_output, status_output]
486
  )
487