Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,89 +1,76 @@
|
|
1 |
import os
|
2 |
import sys
|
3 |
-
import
|
4 |
import tempfile
|
5 |
import uuid
|
6 |
-
import json
|
7 |
import requests
|
8 |
from pathlib import Path
|
9 |
-
import cv2
|
10 |
-
import numpy as np
|
11 |
-
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
12 |
-
import mediapipe as mp
|
13 |
-
import time
|
14 |
|
15 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
try:
|
17 |
-
|
18 |
-
translation_available = True
|
19 |
except ImportError:
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
# Define the title and description
|
23 |
-
TITLE = "
|
24 |
-
DESCRIPTION = """This application translates English and Arabic text into sign language using
|
25 |
-
It translates Arabic to English when needed, then maps the English text to sign language
|
26 |
|
27 |
**Features:**
|
28 |
- Supports both English and Arabic input
|
29 |
-
- Uses
|
30 |
-
-
|
31 |
-
- Automatic language detection and translation
|
32 |
"""
|
33 |
|
34 |
-
# Initialize MediaPipe
|
35 |
-
mp_hands = mp.solutions.hands
|
36 |
-
mp_drawing = mp.solutions.drawing_utils
|
37 |
-
mp_drawing_styles = mp.solutions.drawing_styles
|
38 |
-
mp_holistic = mp.solutions.holistic
|
39 |
-
|
40 |
# Define paths for sign language videos
|
41 |
VIDEO_ROOT = "sign_videos"
|
42 |
os.makedirs(VIDEO_ROOT, exist_ok=True)
|
43 |
os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
|
44 |
-
os.makedirs(f"{VIDEO_ROOT}/ar", exist_ok=True)
|
45 |
-
|
46 |
-
# Define video URLs - use these to download videos on first run
|
47 |
-
# In a real application, you would have a more extensive database
|
48 |
-
SIGN_VIDEOS_URLS = {
|
49 |
-
"hello": "https://media.signbsl.com/videos/bsl/signstation/mp4/hello.mp4",
|
50 |
-
"thank": "https://media.signbsl.com/videos/bsl/signstation/mp4/thank_you.mp4",
|
51 |
-
"yes": "https://media.signbsl.com/videos/bsl/signstation/mp4/yes.mp4",
|
52 |
-
"no": "https://media.signbsl.com/videos/bsl/signstation/mp4/no.mp4",
|
53 |
-
"please": "https://media.signbsl.com/videos/bsl/signstation/mp4/please.mp4",
|
54 |
-
"help": "https://media.signbsl.com/videos/bsl/signstation/mp4/help.mp4",
|
55 |
-
}
|
56 |
|
57 |
# Define mapping of words to video files
|
58 |
SIGN_DICT = {
|
59 |
"en": {
|
60 |
"hello": f"{VIDEO_ROOT}/en/hello.mp4",
|
61 |
-
"hi": f"{VIDEO_ROOT}/en/hello.mp4", # Map to same video
|
62 |
-
"welcome": f"{VIDEO_ROOT}/en/welcome.mp4",
|
63 |
"thank": f"{VIDEO_ROOT}/en/thank.mp4",
|
64 |
"you": f"{VIDEO_ROOT}/en/you.mp4",
|
65 |
-
"thanks": f"{VIDEO_ROOT}/en/thank.mp4", # Map to same video
|
66 |
"please": f"{VIDEO_ROOT}/en/please.mp4",
|
67 |
"wait": f"{VIDEO_ROOT}/en/wait.mp4",
|
68 |
"help": f"{VIDEO_ROOT}/en/help.mp4",
|
69 |
"yes": f"{VIDEO_ROOT}/en/yes.mp4",
|
70 |
"no": f"{VIDEO_ROOT}/en/no.mp4",
|
71 |
-
"how": f"{VIDEO_ROOT}/en/how.mp4",
|
72 |
-
"can": f"{VIDEO_ROOT}/en/can.mp4",
|
73 |
-
"i": f"{VIDEO_ROOT}/en/i.mp4",
|
74 |
-
"service": f"{VIDEO_ROOT}/en/service.mp4",
|
75 |
-
"customer": f"{VIDEO_ROOT}/en/customer.mp4",
|
76 |
-
"sorry": f"{VIDEO_ROOT}/en/sorry.mp4",
|
77 |
}
|
78 |
}
|
79 |
|
80 |
# Create a dictionary for English to Arabic translations and vice versa
|
81 |
TRANSLATIONS = {
|
82 |
"hello": "مرحبا",
|
83 |
-
"hi": "مرحبا",
|
84 |
"welcome": "أهلا وسهلا",
|
85 |
"thank you": "شكرا",
|
86 |
-
"thanks": "شكرا",
|
87 |
"please": "من فضلك",
|
88 |
"wait": "انتظر",
|
89 |
"help": "مساعدة",
|
@@ -92,67 +79,9 @@ TRANSLATIONS = {
|
|
92 |
"how can i help you": "كيف يمكنني مساعدتك",
|
93 |
"customer": "عميل",
|
94 |
"service": "خدمة",
|
95 |
-
"support": "دعم",
|
96 |
"sorry": "آسف",
|
97 |
}
|
98 |
|
99 |
-
# Function to download videos if they don't exist
|
100 |
-
def download_sign_videos():
|
101 |
-
"""Download sign language videos on first run"""
|
102 |
-
for word, url in SIGN_VIDEOS_URLS.items():
|
103 |
-
output_path = f"{VIDEO_ROOT}/en/{word}.mp4"
|
104 |
-
if not os.path.exists(output_path):
|
105 |
-
try:
|
106 |
-
print(f"Downloading {word} sign video...")
|
107 |
-
response = requests.get(url)
|
108 |
-
if response.status_code == 200:
|
109 |
-
with open(output_path, 'wb') as f:
|
110 |
-
f.write(response.content)
|
111 |
-
print(f"Downloaded {word} sign video")
|
112 |
-
else:
|
113 |
-
print(f"Failed to download {word} sign video: {response.status_code}")
|
114 |
-
except Exception as e:
|
115 |
-
print(f"Error downloading {word} sign video: {e}")
|
116 |
-
|
117 |
-
# Function to create placeholder videos when real ones don't exist yet
|
118 |
-
def create_placeholder_video(text, output_path, language="en"):
|
119 |
-
"""Create a placeholder video with text when a real video isn't available"""
|
120 |
-
height, width = 480, 640
|
121 |
-
fps = 30
|
122 |
-
seconds = 1.5
|
123 |
-
|
124 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
125 |
-
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
126 |
-
|
127 |
-
font = cv2.FONT_HERSHEY_SIMPLEX
|
128 |
-
font_scale = 1
|
129 |
-
font_color = (255, 255, 255)
|
130 |
-
line_type = 2
|
131 |
-
|
132 |
-
# Text positioning
|
133 |
-
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
|
134 |
-
text_x = (width - text_size[0]) // 2
|
135 |
-
text_y = (height + text_size[1]) // 2
|
136 |
-
|
137 |
-
for _ in range(int(fps * seconds)):
|
138 |
-
# Create a gradient background to look more professional
|
139 |
-
frame = np.zeros((height, width, 3), dtype=np.uint8)
|
140 |
-
for y in range(height):
|
141 |
-
color = int(50 + (y / height) * 100)
|
142 |
-
frame[y, :] = [color, color, color + 30]
|
143 |
-
|
144 |
-
# Add the word text
|
145 |
-
cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
|
146 |
-
|
147 |
-
# Add language indicator
|
148 |
-
lang_text = "English" if language == "en" else "Arabic"
|
149 |
-
cv2.putText(frame, lang_text, (width - 120, 30), font, 0.7, font_color, 1)
|
150 |
-
|
151 |
-
video.write(frame)
|
152 |
-
|
153 |
-
video.release()
|
154 |
-
return output_path
|
155 |
-
|
156 |
def detect_language(text):
|
157 |
"""Detect if the text is primarily English or Arabic"""
|
158 |
if not text:
|
@@ -173,221 +102,76 @@ def detect_language(text):
|
|
173 |
return "unknown"
|
174 |
|
175 |
def translate_arabic_to_english(text):
|
176 |
-
"""Translate Arabic text to English"""
|
177 |
if not text:
|
178 |
return "", "No text to translate"
|
179 |
|
180 |
-
#
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
|
185 |
-
|
186 |
-
if translation_available:
|
187 |
-
try:
|
188 |
-
translator = GoogleTranslator(source='ar', target='en')
|
189 |
-
translation = translator.translate(text)
|
190 |
-
return translation, f"Translated to English: {translation}"
|
191 |
-
except Exception as e:
|
192 |
-
print(f"Translation error: {e}")
|
193 |
-
return text, f"Error during translation: {e}"
|
194 |
-
else:
|
195 |
-
# Fallback method - very basic word mapping
|
196 |
-
result = text
|
197 |
-
for en, ar in TRANSLATIONS.items():
|
198 |
-
result = result.replace(ar, en)
|
199 |
-
return result, "Used basic translation mapping"
|
200 |
|
201 |
-
def tokenize_text(text
|
202 |
-
"""Split the text into tokens
|
203 |
# Convert to lowercase for English
|
204 |
-
|
205 |
-
text = text.lower()
|
206 |
|
207 |
# Simple tokenization by splitting on spaces
|
208 |
-
|
209 |
-
|
210 |
-
# Try to match multi-word phrases first (like "thank you")
|
211 |
-
result = []
|
212 |
-
i = 0
|
213 |
-
while i < len(tokens):
|
214 |
-
# Try 3-word phrases, then 2-word, then single words
|
215 |
-
matched = False
|
216 |
-
for j in range(min(3, len(tokens) - i), 0, -1):
|
217 |
-
phrase = " ".join(tokens[i:i+j])
|
218 |
-
if language in SIGN_DICT and phrase in SIGN_DICT[language]:
|
219 |
-
result.append(phrase)
|
220 |
-
i += j
|
221 |
-
matched = True
|
222 |
-
break
|
223 |
-
|
224 |
-
# If no match found, add the single token
|
225 |
-
if not matched:
|
226 |
-
result.append(tokens[i])
|
227 |
-
i += 1
|
228 |
-
|
229 |
-
return result
|
230 |
|
231 |
-
def
|
232 |
-
"""
|
233 |
-
|
234 |
-
#
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
cap = cv2.VideoCapture(video_path)
|
239 |
-
if not cap.isOpened():
|
240 |
-
return None, "Could not open video file"
|
241 |
-
|
242 |
-
# Get video properties
|
243 |
-
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
244 |
-
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
245 |
-
fps = cap.get(cv2.CAP_PROP_FPS)
|
246 |
-
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
247 |
-
|
248 |
-
# Initialize MediaPipe Holistic (includes hands, face, and pose)
|
249 |
-
with mp_holistic.Holistic(
|
250 |
-
min_detection_confidence=0.5,
|
251 |
-
min_tracking_confidence=0.5) as holistic:
|
252 |
-
|
253 |
-
# Output video with annotations
|
254 |
-
output_path = video_path.replace(".mp4", "_analyzed.mp4")
|
255 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
256 |
-
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
257 |
-
|
258 |
-
frame_count = 0
|
259 |
-
while cap.isOpened():
|
260 |
-
success, image = cap.read()
|
261 |
-
if not success:
|
262 |
-
break
|
263 |
-
|
264 |
-
# Convert image to RGB and process with MediaPipe
|
265 |
-
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
266 |
-
results = holistic.process(image_rgb)
|
267 |
-
|
268 |
-
# Draw landmarks
|
269 |
-
annotated_image = image.copy()
|
270 |
-
|
271 |
-
# Draw face landmarks
|
272 |
-
if results.face_landmarks:
|
273 |
-
mp_drawing.draw_landmarks(
|
274 |
-
annotated_image,
|
275 |
-
results.face_landmarks,
|
276 |
-
mp_holistic.FACEMESH_CONTOURS,
|
277 |
-
landmark_drawing_spec=None,
|
278 |
-
connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
|
279 |
-
|
280 |
-
# Draw pose landmarks
|
281 |
-
if results.pose_landmarks:
|
282 |
-
mp_drawing.draw_landmarks(
|
283 |
-
annotated_image,
|
284 |
-
results.pose_landmarks,
|
285 |
-
mp_holistic.POSE_CONNECTIONS,
|
286 |
-
landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
|
287 |
-
|
288 |
-
# Draw hand landmarks
|
289 |
-
if results.left_hand_landmarks:
|
290 |
-
mp_drawing.draw_landmarks(
|
291 |
-
annotated_image,
|
292 |
-
results.left_hand_landmarks,
|
293 |
-
mp_hands.HAND_CONNECTIONS,
|
294 |
-
mp_drawing_styles.get_default_hand_landmarks_style(),
|
295 |
-
mp_drawing_styles.get_default_hand_connections_style())
|
296 |
-
|
297 |
-
if results.right_hand_landmarks:
|
298 |
-
mp_drawing.draw_landmarks(
|
299 |
-
annotated_image,
|
300 |
-
results.right_hand_landmarks,
|
301 |
-
mp_hands.HAND_CONNECTIONS,
|
302 |
-
mp_drawing_styles.get_default_hand_landmarks_style(),
|
303 |
-
mp_drawing_styles.get_default_hand_connections_style())
|
304 |
-
|
305 |
-
# Add progress indicator
|
306 |
-
progress = frame_count / total_frames * 100
|
307 |
-
cv2.putText(annotated_image, f"Processing: {progress:.1f}%",
|
308 |
-
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
309 |
-
|
310 |
-
# Write the frame to output video
|
311 |
-
out.write(annotated_image)
|
312 |
-
frame_count += 1
|
313 |
-
|
314 |
-
cap.release()
|
315 |
-
out.release()
|
316 |
-
|
317 |
-
return output_path, f"Analyzed video with MediaPipe. Processed {frame_count} frames."
|
318 |
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
def create_sign_video(tokens, language="en", analyze=False):
|
324 |
-
"""Create a sign language video by concatenating clips for each token"""
|
325 |
-
# Temporary directory for video processing
|
326 |
-
temp_dir = tempfile.gettempdir()
|
327 |
|
328 |
-
|
329 |
-
|
330 |
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
video_path = SIGN_DICT[language][token]
|
336 |
-
# If the video file doesn't exist, create a placeholder
|
337 |
-
if not os.path.exists(video_path):
|
338 |
-
placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
|
339 |
-
create_placeholder_video(token, placeholder_path, language)
|
340 |
-
video_paths.append(placeholder_path)
|
341 |
-
else:
|
342 |
-
video_paths.append(video_path)
|
343 |
-
else:
|
344 |
-
# Create a placeholder video for this token
|
345 |
-
placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
|
346 |
-
create_placeholder_video(token, placeholder_path, language)
|
347 |
-
video_paths.append(placeholder_path)
|
348 |
|
349 |
-
#
|
350 |
-
|
351 |
-
|
|
|
352 |
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
|
356 |
-
|
357 |
-
|
358 |
-
clips = [VideoFileClip(vp) for vp in video_paths if os.path.exists(vp)]
|
359 |
-
if clips:
|
360 |
-
final_clip = concatenate_videoclips(clips)
|
361 |
-
final_clip.write_videofile(output_path, codec="libx264", audio=False)
|
362 |
-
|
363 |
-
# Clean up the temporary clips
|
364 |
-
for clip in clips:
|
365 |
-
clip.close()
|
366 |
-
|
367 |
-
# If analyze is True, use MediaPipe to analyze the video
|
368 |
-
if analyze:
|
369 |
-
analyzed_path, analysis_msg = analyze_sign_video(output_path)
|
370 |
-
if analyzed_path:
|
371 |
-
return analyzed_path, analysis_msg
|
372 |
-
|
373 |
-
return output_path, f"Created sign language video with {len(clips)} clips"
|
374 |
-
else:
|
375 |
-
return None, "No valid video clips were found"
|
376 |
-
except Exception as e:
|
377 |
-
print(f"Error concatenating videos: {str(e)}")
|
378 |
-
# Fallback: return the first video if concatenation fails
|
379 |
-
if video_paths and os.path.exists(video_paths[0]):
|
380 |
-
return video_paths[0], "Failed to concatenate videos, returning single clip"
|
381 |
-
return None, f"Error creating video: {str(e)}"
|
382 |
|
383 |
-
def translate_to_sign(text
|
384 |
-
"""Main function to translate text to sign language
|
385 |
if not text:
|
386 |
return None, ""
|
387 |
|
388 |
-
# Download videos if needed (first run)
|
389 |
-
download_sign_videos()
|
390 |
-
|
391 |
# Detect the input language
|
392 |
language = detect_language(text)
|
393 |
if language == "unknown":
|
@@ -397,28 +181,30 @@ def translate_to_sign(text, analyze_video=False):
|
|
397 |
# If Arabic, translate to English first
|
398 |
if language == "ar":
|
399 |
english_text, translation_status = translate_arabic_to_english(text)
|
400 |
-
|
401 |
-
|
402 |
-
language_for_signs = "en"
|
403 |
-
|
404 |
-
translation_info = f"Original Arabic: \"{text}\"\n{translation_status}\n"
|
405 |
else:
|
406 |
-
|
407 |
-
tokens = tokenize_text(text, "en")
|
408 |
-
language_for_signs = "en"
|
409 |
translation_info = ""
|
410 |
|
|
|
|
|
411 |
if not tokens:
|
412 |
return None, translation_info + "No translatable tokens found."
|
413 |
|
414 |
-
#
|
415 |
-
|
|
|
416 |
|
417 |
-
|
418 |
-
|
|
|
|
|
419 |
|
420 |
# Prepare status message
|
421 |
status = translation_info + video_status
|
|
|
|
|
422 |
|
423 |
return video_path, status
|
424 |
|
@@ -441,12 +227,6 @@ with gr.Blocks(title=TITLE) as demo:
|
|
441 |
label="Text Input"
|
442 |
)
|
443 |
|
444 |
-
analyze_checkbox = gr.Checkbox(
|
445 |
-
label="Analyze with MediaPipe",
|
446 |
-
value=False,
|
447 |
-
info="Apply MediaPipe hand tracking to visualize sign gestures"
|
448 |
-
)
|
449 |
-
|
450 |
with gr.Row():
|
451 |
clear_btn = gr.Button("Clear")
|
452 |
translate_btn = gr.Button("Translate to Sign Language", variant="primary")
|
@@ -455,34 +235,33 @@ with gr.Blocks(title=TITLE) as demo:
|
|
455 |
status_output = gr.Textbox(label="Status", interactive=False)
|
456 |
|
457 |
with gr.Column():
|
458 |
-
# Output video
|
459 |
-
|
460 |
-
label="Sign Language
|
461 |
format="mp4",
|
462 |
-
autoplay=True
|
463 |
-
|
464 |
-
)
|
465 |
|
466 |
# Examples in both languages
|
467 |
gr.Examples(
|
468 |
examples=[
|
469 |
-
["Hello, how can I help you?"
|
470 |
-
["Thank you for your patience."
|
471 |
-
["Yes, please wait."
|
472 |
-
["مرحبا"
|
473 |
-
["شكرا"
|
474 |
-
["نعم، من فضلك انتظر"
|
475 |
],
|
476 |
-
inputs=[text_input
|
477 |
-
outputs=[
|
478 |
fn=translate_to_sign
|
479 |
)
|
480 |
|
481 |
# Event handlers
|
482 |
translate_btn.click(
|
483 |
fn=translate_to_sign,
|
484 |
-
inputs=[text_input
|
485 |
-
outputs=[
|
486 |
)
|
487 |
|
488 |
clear_btn.click(
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
+
import json
|
4 |
import tempfile
|
5 |
import uuid
|
|
|
6 |
import requests
|
7 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# Auto-install required packages
|
10 |
+
try:
|
11 |
+
import gradio as gr
|
12 |
+
import numpy as np
|
13 |
+
except ImportError:
|
14 |
+
print("Installing required packages...")
|
15 |
+
os.system(f"{sys.executable} -m pip install gradio numpy requests")
|
16 |
+
import gradio as gr
|
17 |
+
import numpy as np
|
18 |
+
|
19 |
+
# Try to import OpenCV - if not available, try to install it
|
20 |
try:
|
21 |
+
import cv2
|
|
|
22 |
except ImportError:
|
23 |
+
print("Installing OpenCV...")
|
24 |
+
os.system(f"{sys.executable} -m pip install opencv-python-headless")
|
25 |
+
try:
|
26 |
+
import cv2
|
27 |
+
except ImportError:
|
28 |
+
print("Failed to install OpenCV. Will use simple image processing only.")
|
29 |
+
cv2 = None
|
30 |
+
|
31 |
+
# Try to import moviepy (for video concatenation) - if not available, we'll use a simpler approach
|
32 |
+
try:
|
33 |
+
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
34 |
+
moviepy_available = True
|
35 |
+
except ImportError:
|
36 |
+
print("MoviePy not available. Will use simpler video processing.")
|
37 |
+
moviepy_available = False
|
38 |
|
39 |
# Define the title and description
|
40 |
+
TITLE = "Simple Sign Language Translator"
|
41 |
+
DESCRIPTION = """This application translates English and Arabic text into sign language using simple video generation.
|
42 |
+
It translates Arabic to English when needed, then maps the English text to sign language representations.
|
43 |
|
44 |
**Features:**
|
45 |
- Supports both English and Arabic input
|
46 |
+
- Uses simple visual representations of signs
|
47 |
+
- Automatic language detection
|
|
|
48 |
"""
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Define paths for sign language videos
|
51 |
VIDEO_ROOT = "sign_videos"
|
52 |
os.makedirs(VIDEO_ROOT, exist_ok=True)
|
53 |
os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
# Define mapping of words to video files
|
56 |
SIGN_DICT = {
|
57 |
"en": {
|
58 |
"hello": f"{VIDEO_ROOT}/en/hello.mp4",
|
|
|
|
|
59 |
"thank": f"{VIDEO_ROOT}/en/thank.mp4",
|
60 |
"you": f"{VIDEO_ROOT}/en/you.mp4",
|
|
|
61 |
"please": f"{VIDEO_ROOT}/en/please.mp4",
|
62 |
"wait": f"{VIDEO_ROOT}/en/wait.mp4",
|
63 |
"help": f"{VIDEO_ROOT}/en/help.mp4",
|
64 |
"yes": f"{VIDEO_ROOT}/en/yes.mp4",
|
65 |
"no": f"{VIDEO_ROOT}/en/no.mp4",
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
}
|
67 |
}
|
68 |
|
69 |
# Create a dictionary for English to Arabic translations and vice versa
|
70 |
TRANSLATIONS = {
|
71 |
"hello": "مرحبا",
|
|
|
72 |
"welcome": "أهلا وسهلا",
|
73 |
"thank you": "شكرا",
|
|
|
74 |
"please": "من فضلك",
|
75 |
"wait": "انتظر",
|
76 |
"help": "مساعدة",
|
|
|
79 |
"how can i help you": "كيف يمكنني مساعدتك",
|
80 |
"customer": "عميل",
|
81 |
"service": "خدمة",
|
|
|
82 |
"sorry": "آسف",
|
83 |
}
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
def detect_language(text):
|
86 |
"""Detect if the text is primarily English or Arabic"""
|
87 |
if not text:
|
|
|
102 |
return "unknown"
|
103 |
|
104 |
def translate_arabic_to_english(text):
|
105 |
+
"""Translate Arabic text to English using dictionary lookup"""
|
106 |
if not text:
|
107 |
return "", "No text to translate"
|
108 |
|
109 |
+
# Very basic translation - look up Arabic phrases in our dictionary
|
110 |
+
result = text
|
111 |
+
for en, ar in TRANSLATIONS.items():
|
112 |
+
result = result.replace(ar, en)
|
113 |
|
114 |
+
return result, f"Translated to English: {result}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
+
def tokenize_text(text):
|
117 |
+
"""Split the text into tokens"""
|
118 |
# Convert to lowercase for English
|
119 |
+
text = text.lower()
|
|
|
120 |
|
121 |
# Simple tokenization by splitting on spaces
|
122 |
+
return text.split()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
def create_simple_sign_video(text, output_path):
|
125 |
+
"""Create a simple video with text representation of sign language"""
|
126 |
+
if cv2 is None:
|
127 |
+
# If OpenCV is not available, create a very simple text file
|
128 |
+
with open(output_path.replace('.mp4', '.txt'), 'w') as f:
|
129 |
+
f.write(f"Sign representation for: {text}")
|
130 |
+
return output_path.replace('.mp4', '.txt'), "Created text representation (OpenCV not available)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
+
# If OpenCV is available, create a simple video
|
133 |
+
height, width = 480, 640
|
134 |
+
fps = 30
|
135 |
+
seconds = 2
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
138 |
+
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
139 |
|
140 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
141 |
+
font_scale = 1.5
|
142 |
+
font_color = (255, 255, 255)
|
143 |
+
line_type = 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
|
145 |
+
# Text positioning
|
146 |
+
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
|
147 |
+
text_x = (width - text_size[0]) // 2
|
148 |
+
text_y = (height + text_size[1]) // 2
|
149 |
|
150 |
+
for i in range(int(fps * seconds)):
|
151 |
+
# Create a gradient blue background
|
152 |
+
frame = np.zeros((height, width, 3), dtype=np.uint8)
|
153 |
+
for y in range(height):
|
154 |
+
blue_val = int(50 + (y / height) * 100)
|
155 |
+
frame[y, :] = [blue_val, 30, 20] # BGR
|
156 |
+
|
157 |
+
# Make the text pulse slightly
|
158 |
+
pulse = 1.0 + 0.2 * np.sin(i * 0.2)
|
159 |
+
cv2.putText(frame, text, (text_x, text_y), font, font_scale * pulse, font_color, line_type)
|
160 |
+
|
161 |
+
# Add "SIGN LANGUAGE" text at bottom
|
162 |
+
cv2.putText(frame, "SIGN LANGUAGE", (width//2 - 100, height - 30),
|
163 |
+
font, 0.7, (200, 200, 200), 1)
|
164 |
+
|
165 |
+
video.write(frame)
|
166 |
|
167 |
+
video.release()
|
168 |
+
return output_path, f"Created video representation for '{text}'"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
+
def translate_to_sign(text):
|
171 |
+
"""Main function to translate text to sign language representation"""
|
172 |
if not text:
|
173 |
return None, ""
|
174 |
|
|
|
|
|
|
|
175 |
# Detect the input language
|
176 |
language = detect_language(text)
|
177 |
if language == "unknown":
|
|
|
181 |
# If Arabic, translate to English first
|
182 |
if language == "ar":
|
183 |
english_text, translation_status = translate_arabic_to_english(text)
|
184 |
+
original_text = text
|
185 |
+
translation_info = f"Original Arabic: \"{original_text}\"\n{translation_status}\n"
|
|
|
|
|
|
|
186 |
else:
|
187 |
+
english_text = text
|
|
|
|
|
188 |
translation_info = ""
|
189 |
|
190 |
+
# Tokenize the text
|
191 |
+
tokens = tokenize_text(english_text)
|
192 |
if not tokens:
|
193 |
return None, translation_info + "No translatable tokens found."
|
194 |
|
195 |
+
# Create a temporary directory for the output
|
196 |
+
temp_dir = tempfile.gettempdir()
|
197 |
+
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
|
198 |
|
199 |
+
# Create a sign language video for the first token
|
200 |
+
# In a full implementation, you would create videos for all tokens and concatenate them
|
201 |
+
first_token = tokens[0] if tokens else "error"
|
202 |
+
video_path, video_status = create_simple_sign_video(first_token, output_path)
|
203 |
|
204 |
# Prepare status message
|
205 |
status = translation_info + video_status
|
206 |
+
if len(tokens) > 1:
|
207 |
+
status += f"\nNote: Only showing sign for first word. Full text: {english_text}"
|
208 |
|
209 |
return video_path, status
|
210 |
|
|
|
227 |
label="Text Input"
|
228 |
)
|
229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
with gr.Row():
|
231 |
clear_btn = gr.Button("Clear")
|
232 |
translate_btn = gr.Button("Translate to Sign Language", variant="primary")
|
|
|
235 |
status_output = gr.Textbox(label="Status", interactive=False)
|
236 |
|
237 |
with gr.Column():
|
238 |
+
# Output (video or text, depending on what's available)
|
239 |
+
output_display = gr.Video(
|
240 |
+
label="Sign Language Output",
|
241 |
format="mp4",
|
242 |
+
autoplay=True
|
243 |
+
) if cv2 is not None else gr.Textbox(label="Sign Representation", lines=3)
|
|
|
244 |
|
245 |
# Examples in both languages
|
246 |
gr.Examples(
|
247 |
examples=[
|
248 |
+
["Hello, how can I help you?"],
|
249 |
+
["Thank you for your patience."],
|
250 |
+
["Yes, please wait."],
|
251 |
+
["مرحبا"],
|
252 |
+
["شكرا"],
|
253 |
+
["نعم، من فضلك انتظر"],
|
254 |
],
|
255 |
+
inputs=[text_input],
|
256 |
+
outputs=[output_display, status_output],
|
257 |
fn=translate_to_sign
|
258 |
)
|
259 |
|
260 |
# Event handlers
|
261 |
translate_btn.click(
|
262 |
fn=translate_to_sign,
|
263 |
+
inputs=[text_input],
|
264 |
+
outputs=[output_display, status_output]
|
265 |
)
|
266 |
|
267 |
clear_btn.click(
|