Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,117 +1,163 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
import gradio as gr
|
4 |
-
import requests
|
5 |
-
import json
|
6 |
-
from datetime import datetime
|
7 |
import tempfile
|
8 |
import uuid
|
9 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
#
|
12 |
try:
|
13 |
-
|
14 |
-
|
15 |
-
import numpy as np
|
16 |
except ImportError:
|
17 |
-
|
18 |
-
os.system("pip install mediapipe opencv-python numpy --quiet")
|
19 |
-
import mediapipe as mp
|
20 |
-
import cv2
|
21 |
-
import numpy as np
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
**Features:**
|
28 |
-
- Supports both English and Arabic
|
29 |
-
- Uses
|
30 |
-
-
|
|
|
31 |
"""
|
32 |
|
33 |
# Initialize MediaPipe
|
34 |
mp_hands = mp.solutions.hands
|
35 |
mp_drawing = mp.solutions.drawing_utils
|
36 |
mp_drawing_styles = mp.solutions.drawing_styles
|
37 |
-
|
38 |
|
39 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
TRANSLATIONS = {
|
41 |
"hello": "مرحبا",
|
|
|
42 |
"welcome": "أهلا وسهلا",
|
43 |
"thank you": "شكرا",
|
|
|
|
|
|
|
44 |
"help": "مساعدة",
|
45 |
"yes": "نعم",
|
46 |
"no": "لا",
|
47 |
-
"please": "من فضلك",
|
48 |
-
"wait": "انتظر",
|
49 |
-
"sorry": "آسف",
|
50 |
"how can i help you": "كيف يمكنني مساعدتك",
|
51 |
"customer": "عميل",
|
52 |
"service": "خدمة",
|
53 |
-
"support": "دعم",
|
54 |
-
"
|
55 |
-
"question": "سؤال",
|
56 |
-
"answer": "إجابة",
|
57 |
}
|
58 |
|
59 |
-
#
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
def detect_language(text):
|
111 |
-
"""Detect if text is primarily English or Arabic"""
|
112 |
if not text:
|
113 |
return "unknown"
|
114 |
-
|
115 |
# Simple detection by character set
|
116 |
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
|
117 |
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
@@ -126,230 +172,253 @@ def detect_language(text):
|
|
126 |
else:
|
127 |
return "unknown"
|
128 |
|
129 |
-
def
|
130 |
-
"""
|
131 |
-
if
|
132 |
-
return text
|
133 |
-
|
134 |
-
# Convert to lowercase for matching
|
135 |
-
text_lower = text.lower()
|
136 |
-
|
137 |
-
# For English to Arabic
|
138 |
-
if source_lang == "en" and target_lang == "ar":
|
139 |
-
for eng, ar in TRANSLATIONS.items():
|
140 |
-
text_lower = text_lower.replace(eng, ar)
|
141 |
-
return text_lower
|
142 |
|
143 |
-
#
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
return text_lower
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
i = 0
|
159 |
-
while i < len(tokens):
|
160 |
-
# Try to match longest phrases first
|
161 |
-
matched = False
|
162 |
-
for j in range(min(3, len(tokens) - i), 0, -1):
|
163 |
-
phrase = " ".join(tokens[i:i+j])
|
164 |
-
if phrase in SIGN_DICT[language]:
|
165 |
-
phrases.append(phrase)
|
166 |
-
i += j
|
167 |
-
matched = True
|
168 |
-
break
|
169 |
-
if not matched:
|
170 |
-
phrases.append(tokens[i])
|
171 |
-
i += 1
|
172 |
-
return phrases
|
173 |
else:
|
174 |
-
#
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
matched = False
|
180 |
-
for j in range(min(3, len(tokens) - i), 0, -1):
|
181 |
-
phrase = " ".join(tokens[i:i+j])
|
182 |
-
if phrase in SIGN_DICT[language]:
|
183 |
-
phrases.append(phrase)
|
184 |
-
i += j
|
185 |
-
matched = True
|
186 |
-
break
|
187 |
-
if not matched:
|
188 |
-
phrases.append(tokens[i])
|
189 |
-
i += 1
|
190 |
-
return phrases
|
191 |
|
192 |
-
def
|
193 |
-
"""
|
194 |
-
#
|
195 |
-
|
196 |
-
|
197 |
-
seconds = 2
|
198 |
|
199 |
-
#
|
200 |
-
|
201 |
-
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
202 |
-
|
203 |
-
# Create frames with text
|
204 |
-
font = cv2.FONT_HERSHEY_SIMPLEX
|
205 |
-
font_scale = 1
|
206 |
-
font_color = (255, 255, 255) # White
|
207 |
-
line_type = 2
|
208 |
-
|
209 |
-
# Text positioning
|
210 |
-
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
|
211 |
-
text_x = (width - text_size[0]) // 2
|
212 |
-
text_y = (height + text_size[1]) // 2
|
213 |
|
214 |
-
#
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
-
|
221 |
-
return output_path
|
222 |
|
223 |
-
def
|
224 |
-
"""
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
# Create video writer
|
230 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
231 |
-
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
232 |
-
|
233 |
-
# Create a simple animation with hands
|
234 |
-
frames = fps * duration
|
235 |
-
for i in range(frames):
|
236 |
-
# Create a background based on style
|
237 |
-
if style == "3D":
|
238 |
-
# Create a gradient background
|
239 |
-
frame = np.zeros((height, width, 3), dtype=np.uint8)
|
240 |
-
for y in range(height):
|
241 |
-
for x in range(width):
|
242 |
-
frame[y, x] = [
|
243 |
-
int(100 + 50 * (x / width)),
|
244 |
-
int(60 + 30 * (y / height)),
|
245 |
-
int(120 + 40 * ((x+y) / (width+height)))
|
246 |
-
]
|
247 |
-
else:
|
248 |
-
# Simple solid background for 2D
|
249 |
-
frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)
|
250 |
|
251 |
-
|
252 |
-
if
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
|
266 |
-
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
|
267 |
|
268 |
-
#
|
269 |
-
|
270 |
-
cv2.
|
271 |
-
|
272 |
-
# 2D-style signing
|
273 |
-
# Drawing a simplified 2D signer
|
274 |
-
cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body
|
275 |
-
cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head
|
276 |
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
|
283 |
-
|
284 |
-
y1 = int(height//2 - 70)
|
285 |
-
x2 = int(x1 - 60 * np.cos(angle1))
|
286 |
-
y2 = int(y1 + 60 * np.sin(angle1))
|
287 |
-
cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)
|
288 |
|
289 |
-
|
290 |
-
x3 = int(width//2)
|
291 |
-
y3 = int(height//2 - 70)
|
292 |
-
x4 = int(x3 + 60 * np.cos(angle2))
|
293 |
-
y4 = int(y3 + 60 * np.sin(angle2))
|
294 |
-
cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)
|
295 |
-
|
296 |
-
# Add text with current sign
|
297 |
-
font = cv2.FONT_HERSHEY_SIMPLEX
|
298 |
-
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
|
299 |
-
if language == "ar":
|
300 |
-
# Right-to-left indicator
|
301 |
-
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)
|
302 |
-
|
303 |
-
video.write(frame)
|
304 |
|
305 |
-
|
306 |
-
|
|
|
307 |
|
308 |
-
def
|
309 |
-
"""
|
310 |
-
#
|
311 |
temp_dir = tempfile.gettempdir()
|
312 |
-
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
|
313 |
|
314 |
-
#
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
|
|
|
|
|
|
|
|
|
|
324 |
else:
|
325 |
-
|
|
|
|
|
|
|
326 |
|
327 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
328 |
|
329 |
-
def translate_to_sign(text,
|
330 |
"""Main function to translate text to sign language video"""
|
331 |
if not text:
|
332 |
return None, ""
|
333 |
|
|
|
|
|
|
|
334 |
# Detect the input language
|
335 |
language = detect_language(text)
|
336 |
if language == "unknown":
|
337 |
return None, "Could not determine the language. Please use English or Arabic."
|
338 |
|
339 |
try:
|
340 |
-
#
|
341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
if not tokens:
|
343 |
-
return None, "No translatable tokens found."
|
344 |
|
345 |
# Generate sign language video
|
346 |
-
video_path =
|
|
|
|
|
|
|
347 |
|
348 |
# Prepare status message
|
349 |
-
|
350 |
-
status = f"Translated English: \"{text}\" to sign language."
|
351 |
-
else:
|
352 |
-
status = f"Translated Arabic: \"{text}\" to sign language."
|
353 |
|
354 |
return video_path, status
|
355 |
|
@@ -372,10 +441,10 @@ with gr.Blocks(title=TITLE) as demo:
|
|
372 |
label="Text Input"
|
373 |
)
|
374 |
|
375 |
-
|
376 |
-
|
377 |
-
value=
|
378 |
-
|
379 |
)
|
380 |
|
381 |
with gr.Row():
|
@@ -388,7 +457,7 @@ with gr.Blocks(title=TITLE) as demo:
|
|
388 |
with gr.Column():
|
389 |
# Output video
|
390 |
video_output = gr.Video(
|
391 |
-
label="Sign Language
|
392 |
format="mp4",
|
393 |
autoplay=True,
|
394 |
show_download_button=True
|
@@ -397,14 +466,14 @@ with gr.Blocks(title=TITLE) as demo:
|
|
397 |
# Examples in both languages
|
398 |
gr.Examples(
|
399 |
examples=[
|
400 |
-
["Hello, how can I help you
|
401 |
-
["
|
402 |
-
["
|
403 |
-
["
|
404 |
-
["
|
405 |
-
["
|
406 |
],
|
407 |
-
inputs=[text_input,
|
408 |
outputs=[video_output, status_output],
|
409 |
fn=translate_to_sign
|
410 |
)
|
@@ -412,7 +481,7 @@ with gr.Blocks(title=TITLE) as demo:
|
|
412 |
# Event handlers
|
413 |
translate_btn.click(
|
414 |
fn=translate_to_sign,
|
415 |
-
inputs=[text_input,
|
416 |
outputs=[video_output, status_output]
|
417 |
)
|
418 |
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
import tempfile
|
5 |
import uuid
|
6 |
+
import json
|
7 |
+
import requests
|
8 |
+
from pathlib import Path
|
9 |
+
import cv2
|
10 |
+
import numpy as np
|
11 |
+
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
12 |
+
import mediapipe as mp
|
13 |
+
import time
|
14 |
|
15 |
+
# Try to import translation library - fallback to local method if not available
|
16 |
try:
|
17 |
+
from deep_translator import GoogleTranslator
|
18 |
+
translation_available = True
|
|
|
19 |
except ImportError:
|
20 |
+
translation_available = False
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
# Define the title and description
|
23 |
+
TITLE = "Complete Sign Language Translation System"
|
24 |
+
DESCRIPTION = """This application translates English and Arabic text into sign language using real sign language videos.
|
25 |
+
It translates Arabic to English when needed, then maps the English text to sign language videos.
|
26 |
|
27 |
**Features:**
|
28 |
+
- Supports both English and Arabic input
|
29 |
+
- Uses real sign videos from SignDict and other sources
|
30 |
+
- MediaPipe integration for gesture recognition and hand tracking
|
31 |
+
- Automatic language detection and translation
|
32 |
"""
|
33 |
|
34 |
# Initialize MediaPipe
|
35 |
mp_hands = mp.solutions.hands
|
36 |
mp_drawing = mp.solutions.drawing_utils
|
37 |
mp_drawing_styles = mp.solutions.drawing_styles
|
38 |
+
mp_holistic = mp.solutions.holistic
|
39 |
|
40 |
+
# Define paths for sign language videos
|
41 |
+
VIDEO_ROOT = "sign_videos"
|
42 |
+
os.makedirs(VIDEO_ROOT, exist_ok=True)
|
43 |
+
os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
|
44 |
+
os.makedirs(f"{VIDEO_ROOT}/ar", exist_ok=True)
|
45 |
+
|
46 |
+
# Define video URLs - use these to download videos on first run
|
47 |
+
# In a real application, you would have a more extensive database
|
48 |
+
SIGN_VIDEOS_URLS = {
|
49 |
+
"hello": "https://media.signbsl.com/videos/bsl/signstation/mp4/hello.mp4",
|
50 |
+
"thank": "https://media.signbsl.com/videos/bsl/signstation/mp4/thank_you.mp4",
|
51 |
+
"yes": "https://media.signbsl.com/videos/bsl/signstation/mp4/yes.mp4",
|
52 |
+
"no": "https://media.signbsl.com/videos/bsl/signstation/mp4/no.mp4",
|
53 |
+
"please": "https://media.signbsl.com/videos/bsl/signstation/mp4/please.mp4",
|
54 |
+
"help": "https://media.signbsl.com/videos/bsl/signstation/mp4/help.mp4",
|
55 |
+
}
|
56 |
+
|
57 |
+
# Define mapping of words to video files
|
58 |
+
SIGN_DICT = {
|
59 |
+
"en": {
|
60 |
+
"hello": f"{VIDEO_ROOT}/en/hello.mp4",
|
61 |
+
"hi": f"{VIDEO_ROOT}/en/hello.mp4", # Map to same video
|
62 |
+
"welcome": f"{VIDEO_ROOT}/en/welcome.mp4",
|
63 |
+
"thank": f"{VIDEO_ROOT}/en/thank.mp4",
|
64 |
+
"you": f"{VIDEO_ROOT}/en/you.mp4",
|
65 |
+
"thanks": f"{VIDEO_ROOT}/en/thank.mp4", # Map to same video
|
66 |
+
"please": f"{VIDEO_ROOT}/en/please.mp4",
|
67 |
+
"wait": f"{VIDEO_ROOT}/en/wait.mp4",
|
68 |
+
"help": f"{VIDEO_ROOT}/en/help.mp4",
|
69 |
+
"yes": f"{VIDEO_ROOT}/en/yes.mp4",
|
70 |
+
"no": f"{VIDEO_ROOT}/en/no.mp4",
|
71 |
+
"how": f"{VIDEO_ROOT}/en/how.mp4",
|
72 |
+
"can": f"{VIDEO_ROOT}/en/can.mp4",
|
73 |
+
"i": f"{VIDEO_ROOT}/en/i.mp4",
|
74 |
+
"service": f"{VIDEO_ROOT}/en/service.mp4",
|
75 |
+
"customer": f"{VIDEO_ROOT}/en/customer.mp4",
|
76 |
+
"sorry": f"{VIDEO_ROOT}/en/sorry.mp4",
|
77 |
+
}
|
78 |
+
}
|
79 |
+
|
80 |
+
# Create a dictionary for English to Arabic translations and vice versa
|
81 |
TRANSLATIONS = {
|
82 |
"hello": "مرحبا",
|
83 |
+
"hi": "مرحبا",
|
84 |
"welcome": "أهلا وسهلا",
|
85 |
"thank you": "شكرا",
|
86 |
+
"thanks": "شكرا",
|
87 |
+
"please": "من فضلك",
|
88 |
+
"wait": "انتظر",
|
89 |
"help": "مساعدة",
|
90 |
"yes": "نعم",
|
91 |
"no": "لا",
|
|
|
|
|
|
|
92 |
"how can i help you": "كيف يمكنني مساعدتك",
|
93 |
"customer": "عميل",
|
94 |
"service": "خدمة",
|
95 |
+
"support": "دعم",
|
96 |
+
"sorry": "آسف",
|
|
|
|
|
97 |
}
|
98 |
|
99 |
+
# Function to download videos if they don't exist
|
100 |
+
def download_sign_videos():
|
101 |
+
"""Download sign language videos on first run"""
|
102 |
+
for word, url in SIGN_VIDEOS_URLS.items():
|
103 |
+
output_path = f"{VIDEO_ROOT}/en/{word}.mp4"
|
104 |
+
if not os.path.exists(output_path):
|
105 |
+
try:
|
106 |
+
print(f"Downloading {word} sign video...")
|
107 |
+
response = requests.get(url)
|
108 |
+
if response.status_code == 200:
|
109 |
+
with open(output_path, 'wb') as f:
|
110 |
+
f.write(response.content)
|
111 |
+
print(f"Downloaded {word} sign video")
|
112 |
+
else:
|
113 |
+
print(f"Failed to download {word} sign video: {response.status_code}")
|
114 |
+
except Exception as e:
|
115 |
+
print(f"Error downloading {word} sign video: {e}")
|
116 |
+
|
117 |
+
# Function to create placeholder videos when real ones don't exist yet
|
118 |
+
def create_placeholder_video(text, output_path, language="en"):
|
119 |
+
"""Create a placeholder video with text when a real video isn't available"""
|
120 |
+
height, width = 480, 640
|
121 |
+
fps = 30
|
122 |
+
seconds = 1.5
|
123 |
+
|
124 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
125 |
+
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
126 |
+
|
127 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
128 |
+
font_scale = 1
|
129 |
+
font_color = (255, 255, 255)
|
130 |
+
line_type = 2
|
131 |
+
|
132 |
+
# Text positioning
|
133 |
+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
|
134 |
+
text_x = (width - text_size[0]) // 2
|
135 |
+
text_y = (height + text_size[1]) // 2
|
136 |
+
|
137 |
+
for _ in range(int(fps * seconds)):
|
138 |
+
# Create a gradient background to look more professional
|
139 |
+
frame = np.zeros((height, width, 3), dtype=np.uint8)
|
140 |
+
for y in range(height):
|
141 |
+
color = int(50 + (y / height) * 100)
|
142 |
+
frame[y, :] = [color, color, color + 30]
|
143 |
+
|
144 |
+
# Add the word text
|
145 |
+
cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
|
146 |
+
|
147 |
+
# Add language indicator
|
148 |
+
lang_text = "English" if language == "en" else "Arabic"
|
149 |
+
cv2.putText(frame, lang_text, (width - 120, 30), font, 0.7, font_color, 1)
|
150 |
+
|
151 |
+
video.write(frame)
|
152 |
+
|
153 |
+
video.release()
|
154 |
+
return output_path
|
155 |
|
156 |
def detect_language(text):
|
157 |
+
"""Detect if the text is primarily English or Arabic"""
|
158 |
if not text:
|
159 |
return "unknown"
|
160 |
+
|
161 |
# Simple detection by character set
|
162 |
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
|
163 |
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
|
|
|
172 |
else:
|
173 |
return "unknown"
|
174 |
|
175 |
+
def translate_arabic_to_english(text):
|
176 |
+
"""Translate Arabic text to English"""
|
177 |
+
if not text:
|
178 |
+
return "", "No text to translate"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
+
# Check for direct translations in our dictionary
|
181 |
+
for eng, ar in TRANSLATIONS.items():
|
182 |
+
if ar in text:
|
183 |
+
text = text.replace(ar, eng)
|
|
|
184 |
|
185 |
+
# Try to use deep_translator if available
|
186 |
+
if translation_available:
|
187 |
+
try:
|
188 |
+
translator = GoogleTranslator(source='ar', target='en')
|
189 |
+
translation = translator.translate(text)
|
190 |
+
return translation, f"Translated to English: {translation}"
|
191 |
+
except Exception as e:
|
192 |
+
print(f"Translation error: {e}")
|
193 |
+
return text, f"Error during translation: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
else:
|
195 |
+
# Fallback method - very basic word mapping
|
196 |
+
result = text
|
197 |
+
for en, ar in TRANSLATIONS.items():
|
198 |
+
result = result.replace(ar, en)
|
199 |
+
return result, "Used basic translation mapping"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
+
def tokenize_text(text, language="en"):
|
202 |
+
"""Split the text into tokens that can be matched to sign videos"""
|
203 |
+
# Convert to lowercase for English
|
204 |
+
if language == "en":
|
205 |
+
text = text.lower()
|
|
|
206 |
|
207 |
+
# Simple tokenization by splitting on spaces
|
208 |
+
tokens = text.split()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
+
# Try to match multi-word phrases first (like "thank you")
|
211 |
+
result = []
|
212 |
+
i = 0
|
213 |
+
while i < len(tokens):
|
214 |
+
# Try 3-word phrases, then 2-word, then single words
|
215 |
+
matched = False
|
216 |
+
for j in range(min(3, len(tokens) - i), 0, -1):
|
217 |
+
phrase = " ".join(tokens[i:i+j])
|
218 |
+
if language in SIGN_DICT and phrase in SIGN_DICT[language]:
|
219 |
+
result.append(phrase)
|
220 |
+
i += j
|
221 |
+
matched = True
|
222 |
+
break
|
223 |
+
|
224 |
+
# If no match found, add the single token
|
225 |
+
if not matched:
|
226 |
+
result.append(tokens[i])
|
227 |
+
i += 1
|
228 |
|
229 |
+
return result
|
|
|
230 |
|
231 |
+
def analyze_sign_video(video_path):
|
232 |
+
"""Use MediaPipe to analyze hand movements in a sign language video"""
|
233 |
+
try:
|
234 |
+
# Only process if the file exists
|
235 |
+
if not os.path.exists(video_path):
|
236 |
+
return None, "Video file not found"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
|
238 |
+
cap = cv2.VideoCapture(video_path)
|
239 |
+
if not cap.isOpened():
|
240 |
+
return None, "Could not open video file"
|
241 |
+
|
242 |
+
# Get video properties
|
243 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
244 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
245 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
246 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
247 |
+
|
248 |
+
# Initialize MediaPipe Holistic (includes hands, face, and pose)
|
249 |
+
with mp_holistic.Holistic(
|
250 |
+
min_detection_confidence=0.5,
|
251 |
+
min_tracking_confidence=0.5) as holistic:
|
|
|
|
|
252 |
|
253 |
+
# Output video with annotations
|
254 |
+
output_path = video_path.replace(".mp4", "_analyzed.mp4")
|
255 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
256 |
+
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
|
|
|
|
|
|
|
|
257 |
|
258 |
+
frame_count = 0
|
259 |
+
while cap.isOpened():
|
260 |
+
success, image = cap.read()
|
261 |
+
if not success:
|
262 |
+
break
|
263 |
+
|
264 |
+
# Convert image to RGB and process with MediaPipe
|
265 |
+
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
266 |
+
results = holistic.process(image_rgb)
|
267 |
+
|
268 |
+
# Draw landmarks
|
269 |
+
annotated_image = image.copy()
|
270 |
+
|
271 |
+
# Draw face landmarks
|
272 |
+
if results.face_landmarks:
|
273 |
+
mp_drawing.draw_landmarks(
|
274 |
+
annotated_image,
|
275 |
+
results.face_landmarks,
|
276 |
+
mp_holistic.FACEMESH_CONTOURS,
|
277 |
+
landmark_drawing_spec=None,
|
278 |
+
connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
|
279 |
+
|
280 |
+
# Draw pose landmarks
|
281 |
+
if results.pose_landmarks:
|
282 |
+
mp_drawing.draw_landmarks(
|
283 |
+
annotated_image,
|
284 |
+
results.pose_landmarks,
|
285 |
+
mp_holistic.POSE_CONNECTIONS,
|
286 |
+
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
287 |
+
|
288 |
+
# Draw hand landmarks
|
289 |
+
if results.left_hand_landmarks:
|
290 |
+
mp_drawing.draw_landmarks(
|
291 |
+
annotated_image,
|
292 |
+
results.left_hand_landmarks,
|
293 |
+
mp_hands.HAND_CONNECTIONS,
|
294 |
+
mp_drawing_styles.get_default_hand_landmarks_style(),
|
295 |
+
mp_drawing_styles.get_default_hand_connections_style())
|
296 |
+
|
297 |
+
if results.right_hand_landmarks:
|
298 |
+
mp_drawing.draw_landmarks(
|
299 |
+
annotated_image,
|
300 |
+
results.right_hand_landmarks,
|
301 |
+
mp_hands.HAND_CONNECTIONS,
|
302 |
+
mp_drawing_styles.get_default_hand_landmarks_style(),
|
303 |
+
mp_drawing_styles.get_default_hand_connections_style())
|
304 |
+
|
305 |
+
# Add progress indicator
|
306 |
+
progress = frame_count / total_frames * 100
|
307 |
+
cv2.putText(annotated_image, f"Processing: {progress:.1f}%",
|
308 |
+
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
309 |
+
|
310 |
+
# Write the frame to output video
|
311 |
+
out.write(annotated_image)
|
312 |
+
frame_count += 1
|
313 |
|
314 |
+
cap.release()
|
315 |
+
out.release()
|
|
|
|
|
|
|
|
|
316 |
|
317 |
+
return output_path, f"Analyzed video with MediaPipe. Processed {frame_count} frames."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
+
except Exception as e:
|
320 |
+
print(f"Error analyzing video: {e}")
|
321 |
+
return None, f"Error analyzing video: {e}"
|
322 |
|
323 |
+
def create_sign_video(tokens, language="en", analyze=False):
|
324 |
+
"""Create a sign language video by concatenating clips for each token"""
|
325 |
+
# Temporary directory for video processing
|
326 |
temp_dir = tempfile.gettempdir()
|
|
|
327 |
|
328 |
+
# List to store video paths for each token
|
329 |
+
video_paths = []
|
330 |
+
|
331 |
+
# For each token, find or create a video
|
332 |
+
for token in tokens:
|
333 |
+
# Check if we have a real video for this token
|
334 |
+
if language in SIGN_DICT and token in SIGN_DICT[language]:
|
335 |
+
video_path = SIGN_DICT[language][token]
|
336 |
+
# If the video file doesn't exist, create a placeholder
|
337 |
+
if not os.path.exists(video_path):
|
338 |
+
placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
|
339 |
+
create_placeholder_video(token, placeholder_path, language)
|
340 |
+
video_paths.append(placeholder_path)
|
341 |
+
else:
|
342 |
+
video_paths.append(video_path)
|
343 |
else:
|
344 |
+
# Create a placeholder video for this token
|
345 |
+
placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
|
346 |
+
create_placeholder_video(token, placeholder_path, language)
|
347 |
+
video_paths.append(placeholder_path)
|
348 |
|
349 |
+
# If no videos were created, return None
|
350 |
+
if not video_paths:
|
351 |
+
return None, "No videos were created"
|
352 |
+
|
353 |
+
# Concatenate all videos
|
354 |
+
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
|
355 |
+
|
356 |
+
try:
|
357 |
+
# Using moviepy to concatenate videos
|
358 |
+
clips = [VideoFileClip(vp) for vp in video_paths if os.path.exists(vp)]
|
359 |
+
if clips:
|
360 |
+
final_clip = concatenate_videoclips(clips)
|
361 |
+
final_clip.write_videofile(output_path, codec="libx264", audio=False)
|
362 |
+
|
363 |
+
# Clean up the temporary clips
|
364 |
+
for clip in clips:
|
365 |
+
clip.close()
|
366 |
+
|
367 |
+
# If analyze is True, use MediaPipe to analyze the video
|
368 |
+
if analyze:
|
369 |
+
analyzed_path, analysis_msg = analyze_sign_video(output_path)
|
370 |
+
if analyzed_path:
|
371 |
+
return analyzed_path, analysis_msg
|
372 |
+
|
373 |
+
return output_path, f"Created sign language video with {len(clips)} clips"
|
374 |
+
else:
|
375 |
+
return None, "No valid video clips were found"
|
376 |
+
except Exception as e:
|
377 |
+
print(f"Error concatenating videos: {str(e)}")
|
378 |
+
# Fallback: return the first video if concatenation fails
|
379 |
+
if video_paths and os.path.exists(video_paths[0]):
|
380 |
+
return video_paths[0], "Failed to concatenate videos, returning single clip"
|
381 |
+
return None, f"Error creating video: {str(e)}"
|
382 |
|
383 |
+
def translate_to_sign(text, analyze_video=False):
|
384 |
"""Main function to translate text to sign language video"""
|
385 |
if not text:
|
386 |
return None, ""
|
387 |
|
388 |
+
# Download videos if needed (first run)
|
389 |
+
download_sign_videos()
|
390 |
+
|
391 |
# Detect the input language
|
392 |
language = detect_language(text)
|
393 |
if language == "unknown":
|
394 |
return None, "Could not determine the language. Please use English or Arabic."
|
395 |
|
396 |
try:
|
397 |
+
# If Arabic, translate to English first
|
398 |
+
if language == "ar":
|
399 |
+
english_text, translation_status = translate_arabic_to_english(text)
|
400 |
+
# Use English dictionary for sign videos
|
401 |
+
tokens = tokenize_text(english_text, "en")
|
402 |
+
language_for_signs = "en"
|
403 |
+
|
404 |
+
translation_info = f"Original Arabic: \"{text}\"\n{translation_status}\n"
|
405 |
+
else:
|
406 |
+
# Use English text directly
|
407 |
+
tokens = tokenize_text(text, "en")
|
408 |
+
language_for_signs = "en"
|
409 |
+
translation_info = ""
|
410 |
+
|
411 |
if not tokens:
|
412 |
+
return None, translation_info + "No translatable tokens found."
|
413 |
|
414 |
# Generate sign language video
|
415 |
+
video_path, video_status = create_sign_video(tokens, language_for_signs, analyze_video)
|
416 |
+
|
417 |
+
if not video_path:
|
418 |
+
return None, translation_info + "Failed to create sign language video."
|
419 |
|
420 |
# Prepare status message
|
421 |
+
status = translation_info + video_status
|
|
|
|
|
|
|
422 |
|
423 |
return video_path, status
|
424 |
|
|
|
441 |
label="Text Input"
|
442 |
)
|
443 |
|
444 |
+
analyze_checkbox = gr.Checkbox(
|
445 |
+
label="Analyze with MediaPipe",
|
446 |
+
value=False,
|
447 |
+
info="Apply MediaPipe hand tracking to visualize sign gestures"
|
448 |
)
|
449 |
|
450 |
with gr.Row():
|
|
|
457 |
with gr.Column():
|
458 |
# Output video
|
459 |
video_output = gr.Video(
|
460 |
+
label="Sign Language Video",
|
461 |
format="mp4",
|
462 |
autoplay=True,
|
463 |
show_download_button=True
|
|
|
466 |
# Examples in both languages
|
467 |
gr.Examples(
|
468 |
examples=[
|
469 |
+
["Hello, how can I help you?", False],
|
470 |
+
["Thank you for your patience.", False],
|
471 |
+
["Yes, please wait.", True], # With analysis
|
472 |
+
["مرحبا", False],
|
473 |
+
["شكرا", False],
|
474 |
+
["نعم، من فضلك انتظر", True], # With analysis
|
475 |
],
|
476 |
+
inputs=[text_input, analyze_checkbox],
|
477 |
outputs=[video_output, status_output],
|
478 |
fn=translate_to_sign
|
479 |
)
|
|
|
481 |
# Event handlers
|
482 |
translate_btn.click(
|
483 |
fn=translate_to_sign,
|
484 |
+
inputs=[text_input, analyze_checkbox],
|
485 |
outputs=[video_output, status_output]
|
486 |
)
|
487 |
|