Spaces:
Sleeping
Sleeping
File size: 12,448 Bytes
80bb9b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 |
import os
import sys
import gradio as gr
import requests
import json
from datetime import datetime
import tempfile
import uuid
# Install required packages if not already installed
try:
import mediapipe as mp
import cv2
import numpy as np
from googletrans import Translator
except ImportError:
print("Installing required packages...")
os.system("pip install mediapipe opencv-python numpy googletrans==4.0.0-rc1 --quiet")
import mediapipe as mp
import cv2
import numpy as np
from googletrans import Translator
TITLE = "Multilingual Sign Language Customer Assistant"
DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
The system automatically detects the input language and generates appropriate sign language visuals.
**Features:**
- Supports both English and Arabic text
- Uses SignDict for English sign language vocabulary
- Uses ArSL for Arabic sign language
- Perfect for customer service and assistance scenarios
"""
# Initialize the translation components
translator = Translator()
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
# SignDict - dictionary of common signs in both languages
# In a production app, these would link to pre-recorded videos or 3D animations
SIGN_DICT = {
"en": {
"hello": "signs/en/hello.mp4",
"welcome": "signs/en/welcome.mp4",
"thank you": "signs/en/thank_you.mp4",
"help": "signs/en/help.mp4",
"yes": "signs/en/yes.mp4",
"no": "signs/en/no.mp4",
"please": "signs/en/please.mp4",
"wait": "signs/en/wait.mp4",
"sorry": "signs/en/sorry.mp4",
"how": "signs/en/how.mp4",
"what": "signs/en/what.mp4",
"where": "signs/en/where.mp4",
"when": "signs/en/when.mp4",
"who": "signs/en/who.mp4",
"why": "signs/en/why.mp4",
"customer": "signs/en/customer.mp4",
"service": "signs/en/service.mp4",
"support": "signs/en/support.mp4",
"information": "signs/en/information.mp4",
"question": "signs/en/question.mp4",
"answer": "signs/en/answer.mp4",
},
"ar": {
"مرحبا": "signs/ar/hello.mp4",
"أهلا وسهلا": "signs/ar/welcome.mp4",
"شكرا": "signs/ar/thank_you.mp4",
"مساعدة": "signs/ar/help.mp4",
"نعم": "signs/ar/yes.mp4",
"لا": "signs/ar/no.mp4",
"من فضلك": "signs/ar/please.mp4",
"انتظر": "signs/ar/wait.mp4",
"آسف": "signs/ar/sorry.mp4",
"كيف": "signs/ar/how.mp4",
"ماذا": "signs/ar/what.mp4",
"أين": "signs/ar/where.mp4",
"متى": "signs/ar/when.mp4",
"من": "signs/ar/who.mp4",
"لماذا": "signs/ar/why.mp4",
"عميل": "signs/ar/customer.mp4",
"خدمة": "signs/ar/service.mp4",
"دعم": "signs/ar/support.mp4",
"معلومات": "signs/ar/information.mp4",
"سؤال": "signs/ar/question.mp4",
"إجابة": "signs/ar/answer.mp4",
}
}
def detect_language(text):
"""Detect if text is primarily English or Arabic"""
if not text:
return "unknown"
# Simple detection by character set
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
arabic_count = sum(1 for char in text if char in arabic_chars)
english_count = sum(1 for char in text if char in english_chars)
if arabic_count > english_count:
return "ar"
elif english_count > 0:
return "en"
else:
return "unknown"
def tokenize_text(text, language):
"""Split text into tokens that can be matched to signs"""
if language == "ar":
# Arabic tokenization
tokens = text.split()
# Check for phrases
phrases = []
i = 0
while i < len(tokens):
# Try to match longest phrases first
matched = False
for j in range(min(3, len(tokens) - i), 0, -1):
phrase = " ".join(tokens[i:i+j])
if phrase in SIGN_DICT[language]:
phrases.append(phrase)
i += j
matched = True
break
if not matched:
phrases.append(tokens[i])
i += 1
return phrases
else:
# English tokenization
tokens = text.lower().split()
phrases = []
i = 0
while i < len(tokens):
matched = False
for j in range(min(3, len(tokens) - i), 0, -1):
phrase = " ".join(tokens[i:i+j])
if phrase in SIGN_DICT[language]:
phrases.append(phrase)
i += j
matched = True
break
if not matched:
phrases.append(tokens[i])
i += 1
return phrases
def translate_if_needed(text, source_lang, target_lang):
"""Translate text if it's not already in the target language"""
if source_lang == target_lang:
return text
try:
translation = translator.translate(text, src=source_lang, dest=target_lang)
return translation.text
except Exception as e:
print(f"Translation error: {str(e)}")
return text
def generate_default_sign_video(text, output_path, language="en"):
"""Generate a simple video with the text when no sign is available"""
# Create a black frame with text
height, width = 480, 640
fps = 30
seconds = 2
# Create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
# Create frames with text
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_color = (255, 255, 255) # White
line_type = 2
# Text positioning
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
text_x = (width - text_size[0]) // 2
text_y = (height + text_size[1]) // 2
# Write frames
for _ in range(fps * seconds):
frame = np.zeros((height, width, 3), dtype=np.uint8)
cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
video.write(frame)
video.release()
return output_path
def create_avatar_animation(text, output_path, language="en"):
"""Create a 3D avatar animation for the sign (simplified version)"""
# In a real implementation, this would use a 3D avatar system
# Here we'll just simulate it with a basic animation
width, height = 640, 480
fps = 30
duration = 3 # seconds
# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
# Create a simple animation with hands
frames = fps * duration
for i in range(frames):
# Create a dark blue background
frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([100, 60, 20], dtype=np.uint8)
# Draw a simple avatar body
cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
# Animate hands based on frame number
t = i / frames
# Left hand movement
x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
# Right hand movement
x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
# Draw hands
cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
# Add text with current sign
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (255, 255, 255), 2)
if language == "ar":
# Right-to-left indicator
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (255, 255, 255), 1)
video.write(frame)
video.release()
return output_path
def generate_sign_video(tokens, language, output_format="3D"):
"""Generate sign language video for the given tokens"""
# For each token, either find a pre-recorded video or generate one
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
# In a real implementation, this would concatenate actual sign videos
# For this demo, we'll create a simple animation
if language in SIGN_DICT and tokens[0] in SIGN_DICT[language]:
# In a real implementation, this would load the video file
# For demo purposes, we'll create an animation
create_avatar_animation(tokens[0], output_path, language)
else:
# Generate a default video with text
generate_default_sign_video(tokens[0], output_path, language)
return output_path
def translate_to_sign(text, output_format="3D"):
"""Main function to translate text to sign language video"""
if not text:
return None, ""
# Detect the input language
language = detect_language(text)
if language == "unknown":
return None, "Could not determine the language. Please use English or Arabic."
try:
# Tokenize the text
tokens = tokenize_text(text, language)
if not tokens:
return None, "No translatable tokens found."
# Generate sign language video
video_path = generate_sign_video(tokens, language, output_format)
# Prepare status message
if language == "en":
status = f"Translated English: \"{text}\" to sign language."
else:
status = f"Translated Arabic: \"{text}\" to sign language."
return video_path, status
except Exception as e:
error_msg = str(e)
print(f"Error during translation: {error_msg}")
return None, f"Error during translation: {error_msg}"
# Create the Gradio interface
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
# Input area
text_input = gr.Textbox(
lines=4,
placeholder="Enter English or Arabic text here...",
label="Text Input"
)
format_dropdown = gr.Dropdown(
choices=["3D", "2D"],
value="3D",
label="Avatar Style"
)
with gr.Row():
clear_btn = gr.Button("Clear")
translate_btn = gr.Button("Translate to Sign Language", variant="primary")
# Status area
status_output = gr.Textbox(label="Status", interactive=False)
with gr.Column():
# Output video
video_output = gr.Video(
label="Sign Language Output",
format="mp4",
autoplay=True,
show_download_button=True
)
# Examples in both languages
gr.Examples(
examples=[
["Hello, how can I help you today?"],
["Please wait while I check your account."],
["Thank you for your patience."],
["مرحبا، كيف يمكنني مساعدتك اليوم؟"],
["من فضلك انتظر بينما أتحقق من حسابك."],
["شكرا لصبرك."]
],
inputs=[text_input],
outputs=[video_output, status_output],
fn=lambda text: translate_to_sign(text)
)
# Event handlers
translate_btn.click(
fn=translate_to_sign,
inputs=[text_input, format_dropdown],
outputs=[video_output, status_output]
)
clear_btn.click(
fn=lambda: ("", "Input cleared"),
inputs=None,
outputs=[text_input, status_output]
)
# Launch the app
if __name__ == "__main__":
demo.launch()
|