Spaces:
Sleeping
Sleeping
File size: 14,828 Bytes
80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 80bb9b6 1ceb289 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 |
import os
import sys
import gradio as gr
import requests
import json
from datetime import datetime
import tempfile
import uuid
import re
# Install required packages if not already installed
try:
import mediapipe as mp
import cv2
import numpy as np
except ImportError:
print("Installing required packages...")
os.system("pip install mediapipe opencv-python numpy --quiet")
import mediapipe as mp
import cv2
import numpy as np
TITLE = "Multilingual Sign Language Customer Assistant"
DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
The system automatically detects the input language and generates appropriate sign language visuals.
**Features:**
- Supports both English and Arabic text
- Uses 3D avatar technology to generate sign language
- Perfect for customer service and assistance scenarios
"""
# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
# Dictionary of translations for common customer service phrases
TRANSLATIONS = {
"hello": "مرحبا",
"welcome": "أهلا وسهلا",
"thank you": "شكرا",
"help": "مساعدة",
"yes": "نعم",
"no": "لا",
"please": "من فضلك",
"wait": "انتظر",
"sorry": "آسف",
"how can i help you": "كيف يمكنني مساعدتك",
"customer": "عميل",
"service": "خدمة",
"support": "دعم",
"information": "معلومات",
"question": "سؤال",
"answer": "إجابة",
}
# SignDict - dictionary of common signs in both languages
# In a production app, these would link to pre-recorded videos or 3D animations
SIGN_DICT = {
"en": {
"hello": "signs/en/hello.mp4",
"welcome": "signs/en/welcome.mp4",
"thank you": "signs/en/thank_you.mp4",
"help": "signs/en/help.mp4",
"yes": "signs/en/yes.mp4",
"no": "signs/en/no.mp4",
"please": "signs/en/please.mp4",
"wait": "signs/en/wait.mp4",
"sorry": "signs/en/sorry.mp4",
"how": "signs/en/how.mp4",
"what": "signs/en/what.mp4",
"where": "signs/en/where.mp4",
"when": "signs/en/when.mp4",
"who": "signs/en/who.mp4",
"why": "signs/en/why.mp4",
"customer": "signs/en/customer.mp4",
"service": "signs/en/service.mp4",
"support": "signs/en/support.mp4",
"information": "signs/en/information.mp4",
"question": "signs/en/question.mp4",
"answer": "signs/en/answer.mp4",
},
"ar": {
"مرحبا": "signs/ar/hello.mp4",
"أهلا وسهلا": "signs/ar/welcome.mp4",
"شكرا": "signs/ar/thank_you.mp4",
"مساعدة": "signs/ar/help.mp4",
"نعم": "signs/ar/yes.mp4",
"لا": "signs/ar/no.mp4",
"من فضلك": "signs/ar/please.mp4",
"انتظر": "signs/ar/wait.mp4",
"آسف": "signs/ar/sorry.mp4",
"كيف": "signs/ar/how.mp4",
"ماذا": "signs/ar/what.mp4",
"أين": "signs/ar/where.mp4",
"متى": "signs/ar/when.mp4",
"من": "signs/ar/who.mp4",
"لماذا": "signs/ar/why.mp4",
"عميل": "signs/ar/customer.mp4",
"خدمة": "signs/ar/service.mp4",
"دعم": "signs/ar/support.mp4",
"معلومات": "signs/ar/information.mp4",
"سؤال": "signs/ar/question.mp4",
"إجابة": "signs/ar/answer.mp4",
}
}
def detect_language(text):
"""Detect if text is primarily English or Arabic"""
if not text:
return "unknown"
# Simple detection by character set
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
arabic_count = sum(1 for char in text if char in arabic_chars)
english_count = sum(1 for char in text if char in english_chars)
if arabic_count > english_count:
return "ar"
elif english_count > 0:
return "en"
else:
return "unknown"
def translate_text(text, source_lang, target_lang):
"""Simple dictionary-based translation"""
if source_lang == target_lang:
return text
# Convert to lowercase for matching
text_lower = text.lower()
# For English to Arabic
if source_lang == "en" and target_lang == "ar":
for eng, ar in TRANSLATIONS.items():
text_lower = text_lower.replace(eng, ar)
return text_lower
# For Arabic to English
if source_lang == "ar" and target_lang == "en":
for eng, ar in TRANSLATIONS.items():
text_lower = text_lower.replace(ar, eng)
return text_lower
return text # Return original if no translation path
def tokenize_text(text, language):
"""Split text into tokens that can be matched to signs"""
if language == "ar":
# Arabic tokenization
tokens = text.split()
# Check for phrases
phrases = []
i = 0
while i < len(tokens):
# Try to match longest phrases first
matched = False
for j in range(min(3, len(tokens) - i), 0, -1):
phrase = " ".join(tokens[i:i+j])
if phrase in SIGN_DICT[language]:
phrases.append(phrase)
i += j
matched = True
break
if not matched:
phrases.append(tokens[i])
i += 1
return phrases
else:
# English tokenization
tokens = text.lower().split()
phrases = []
i = 0
while i < len(tokens):
matched = False
for j in range(min(3, len(tokens) - i), 0, -1):
phrase = " ".join(tokens[i:i+j])
if phrase in SIGN_DICT[language]:
phrases.append(phrase)
i += j
matched = True
break
if not matched:
phrases.append(tokens[i])
i += 1
return phrases
def generate_default_sign_video(text, output_path, language="en"):
"""Generate a simple video with the text when no sign is available"""
# Create a black frame with text
height, width = 480, 640
fps = 30
seconds = 2
# Create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
# Create frames with text
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_color = (255, 255, 255) # White
line_type = 2
# Text positioning
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
text_x = (width - text_size[0]) // 2
text_y = (height + text_size[1]) // 2
# Write frames
for _ in range(fps * seconds):
frame = np.zeros((height, width, 3), dtype=np.uint8)
cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
video.write(frame)
video.release()
return output_path
def create_avatar_animation(text, output_path, language="en", style="3D"):
"""Create a 3D avatar animation for the sign (simplified version)"""
width, height = 640, 480
fps = 30
duration = 3 # seconds
# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
# Create a simple animation with hands
frames = fps * duration
for i in range(frames):
# Create a background based on style
if style == "3D":
# Create a gradient background
frame = np.zeros((height, width, 3), dtype=np.uint8)
for y in range(height):
for x in range(width):
frame[y, x] = [
int(100 + 50 * (x / width)),
int(60 + 30 * (y / height)),
int(120 + 40 * ((x+y) / (width+height)))
]
else:
# Simple solid background for 2D
frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)
# Draw a simple avatar
if style == "3D":
# 3D-style avatar
# Body
cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
# Head
cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
# Animate hands based on frame number
t = i / frames
# Left hand movement
x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
# Right hand movement
x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
# Draw hands
cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
else:
# 2D-style signing
# Drawing a simplified 2D signer
cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body
cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head
# Animated hands for signing
t = i / frames
angle1 = t * 2 * np.pi
angle2 = t * 2 * np.pi + np.pi/2
# Left arm
x1 = int(width//2)
y1 = int(height//2 - 70)
x2 = int(x1 - 60 * np.cos(angle1))
y2 = int(y1 + 60 * np.sin(angle1))
cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)
# Right arm
x3 = int(width//2)
y3 = int(height//2 - 70)
x4 = int(x3 + 60 * np.cos(angle2))
y4 = int(y3 + 60 * np.sin(angle2))
cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)
# Add text with current sign
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
if language == "ar":
# Right-to-left indicator
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)
video.write(frame)
video.release()
return output_path
def generate_sign_video(tokens, language, output_format="3D"):
"""Generate sign language video for the given tokens"""
# For each token, either find a pre-recorded video or generate one
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
# In a real implementation, this would concatenate actual sign videos
# For this demo, we'll create a simple animation
if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]:
# In a real implementation, this would load the video file
# For demo purposes, we'll create an animation
create_avatar_animation(tokens[0], output_path, language, output_format)
else:
# Generate a default video with text
if tokens:
create_avatar_animation(tokens[0], output_path, language, output_format)
else:
create_avatar_animation("No tokens", output_path, language, output_format)
return output_path
def translate_to_sign(text, output_format="3D"):
"""Main function to translate text to sign language video"""
if not text:
return None, ""
# Detect the input language
language = detect_language(text)
if language == "unknown":
return None, "Could not determine the language. Please use English or Arabic."
try:
# Tokenize the text
tokens = tokenize_text(text, language)
if not tokens:
return None, "No translatable tokens found."
# Generate sign language video
video_path = generate_sign_video(tokens, language, output_format)
# Prepare status message
if language == "en":
status = f"Translated English: \"{text}\" to sign language."
else:
status = f"Translated Arabic: \"{text}\" to sign language."
return video_path, status
except Exception as e:
error_msg = str(e)
print(f"Error during translation: {error_msg}")
return None, f"Error during translation: {error_msg}"
# Create the Gradio interface
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
# Input area
text_input = gr.Textbox(
lines=4,
placeholder="Enter English or Arabic text here...",
label="Text Input"
)
format_dropdown = gr.Dropdown(
choices=["3D", "2D"],
value="3D",
label="Avatar Style"
)
with gr.Row():
clear_btn = gr.Button("Clear")
translate_btn = gr.Button("Translate to Sign Language", variant="primary")
# Status area
status_output = gr.Textbox(label="Status", interactive=False)
with gr.Column():
# Output video
video_output = gr.Video(
label="Sign Language Output",
format="mp4",
autoplay=True,
show_download_button=True
)
# Examples in both languages
gr.Examples(
examples=[
["Hello, how can I help you today?", "3D"],
["Please wait while I check your account.", "3D"],
["Thank you for your patience.", "3D"],
["مرحبا، كيف يمكنني مساعدتك اليوم؟", "3D"],
["من فضلك انتظر بينما أتحقق من حسابك.", "3D"],
["شكرا لصبرك.", "3D"]
],
inputs=[text_input, format_dropdown],
outputs=[video_output, status_output],
fn=translate_to_sign
)
# Event handlers
translate_btn.click(
fn=translate_to_sign,
inputs=[text_input, format_dropdown],
outputs=[video_output, status_output]
)
clear_btn.click(
fn=lambda: ("", "Input cleared"),
inputs=None,
outputs=[text_input, status_output]
)
# Launch the app
if __name__ == "__main__":
demo.launch() |