Spaces:
Sleeping
Sleeping
import os | |
import sys | |
import gradio as gr | |
import requests | |
import json | |
from datetime import datetime | |
import tempfile | |
import uuid | |
# Install required packages if not already installed | |
try: | |
import mediapipe as mp | |
import cv2 | |
import numpy as np | |
from googletrans import Translator | |
except ImportError: | |
print("Installing required packages...") | |
os.system("pip install mediapipe opencv-python numpy googletrans==4.0.0-rc1 --quiet") | |
import mediapipe as mp | |
import cv2 | |
import numpy as np | |
from googletrans import Translator | |
TITLE = "Multilingual Sign Language Customer Assistant" | |
DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance. | |
The system automatically detects the input language and generates appropriate sign language visuals. | |
**Features:** | |
- Supports both English and Arabic text | |
- Uses SignDict for English sign language vocabulary | |
- Uses ArSL for Arabic sign language | |
- Perfect for customer service and assistance scenarios | |
""" | |
# Initialize the translation components | |
translator = Translator() | |
mp_hands = mp.solutions.hands | |
mp_drawing = mp.solutions.drawing_utils | |
mp_drawing_styles = mp.solutions.drawing_styles | |
mp_pose = mp.solutions.pose | |
# SignDict - dictionary of common signs in both languages | |
# In a production app, these would link to pre-recorded videos or 3D animations | |
SIGN_DICT = { | |
"en": { | |
"hello": "signs/en/hello.mp4", | |
"welcome": "signs/en/welcome.mp4", | |
"thank you": "signs/en/thank_you.mp4", | |
"help": "signs/en/help.mp4", | |
"yes": "signs/en/yes.mp4", | |
"no": "signs/en/no.mp4", | |
"please": "signs/en/please.mp4", | |
"wait": "signs/en/wait.mp4", | |
"sorry": "signs/en/sorry.mp4", | |
"how": "signs/en/how.mp4", | |
"what": "signs/en/what.mp4", | |
"where": "signs/en/where.mp4", | |
"when": "signs/en/when.mp4", | |
"who": "signs/en/who.mp4", | |
"why": "signs/en/why.mp4", | |
"customer": "signs/en/customer.mp4", | |
"service": "signs/en/service.mp4", | |
"support": "signs/en/support.mp4", | |
"information": "signs/en/information.mp4", | |
"question": "signs/en/question.mp4", | |
"answer": "signs/en/answer.mp4", | |
}, | |
"ar": { | |
"مرحبا": "signs/ar/hello.mp4", | |
"أهلا وسهلا": "signs/ar/welcome.mp4", | |
"شكرا": "signs/ar/thank_you.mp4", | |
"مساعدة": "signs/ar/help.mp4", | |
"نعم": "signs/ar/yes.mp4", | |
"لا": "signs/ar/no.mp4", | |
"من فضلك": "signs/ar/please.mp4", | |
"انتظر": "signs/ar/wait.mp4", | |
"آسف": "signs/ar/sorry.mp4", | |
"كيف": "signs/ar/how.mp4", | |
"ماذا": "signs/ar/what.mp4", | |
"أين": "signs/ar/where.mp4", | |
"متى": "signs/ar/when.mp4", | |
"من": "signs/ar/who.mp4", | |
"لماذا": "signs/ar/why.mp4", | |
"عميل": "signs/ar/customer.mp4", | |
"خدمة": "signs/ar/service.mp4", | |
"دعم": "signs/ar/support.mp4", | |
"معلومات": "signs/ar/information.mp4", | |
"سؤال": "signs/ar/question.mp4", | |
"إجابة": "signs/ar/answer.mp4", | |
} | |
} | |
def detect_language(text): | |
"""Detect if text is primarily English or Arabic""" | |
if not text: | |
return "unknown" | |
# Simple detection by character set | |
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي') | |
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') | |
arabic_count = sum(1 for char in text if char in arabic_chars) | |
english_count = sum(1 for char in text if char in english_chars) | |
if arabic_count > english_count: | |
return "ar" | |
elif english_count > 0: | |
return "en" | |
else: | |
return "unknown" | |
def tokenize_text(text, language): | |
"""Split text into tokens that can be matched to signs""" | |
if language == "ar": | |
# Arabic tokenization | |
tokens = text.split() | |
# Check for phrases | |
phrases = [] | |
i = 0 | |
while i < len(tokens): | |
# Try to match longest phrases first | |
matched = False | |
for j in range(min(3, len(tokens) - i), 0, -1): | |
phrase = " ".join(tokens[i:i+j]) | |
if phrase in SIGN_DICT[language]: | |
phrases.append(phrase) | |
i += j | |
matched = True | |
break | |
if not matched: | |
phrases.append(tokens[i]) | |
i += 1 | |
return phrases | |
else: | |
# English tokenization | |
tokens = text.lower().split() | |
phrases = [] | |
i = 0 | |
while i < len(tokens): | |
matched = False | |
for j in range(min(3, len(tokens) - i), 0, -1): | |
phrase = " ".join(tokens[i:i+j]) | |
if phrase in SIGN_DICT[language]: | |
phrases.append(phrase) | |
i += j | |
matched = True | |
break | |
if not matched: | |
phrases.append(tokens[i]) | |
i += 1 | |
return phrases | |
def translate_if_needed(text, source_lang, target_lang): | |
"""Translate text if it's not already in the target language""" | |
if source_lang == target_lang: | |
return text | |
try: | |
translation = translator.translate(text, src=source_lang, dest=target_lang) | |
return translation.text | |
except Exception as e: | |
print(f"Translation error: {str(e)}") | |
return text | |
def generate_default_sign_video(text, output_path, language="en"): | |
"""Generate a simple video with the text when no sign is available""" | |
# Create a black frame with text | |
height, width = 480, 640 | |
fps = 30 | |
seconds = 2 | |
# Create a VideoWriter object | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
# Create frames with text | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
font_scale = 1 | |
font_color = (255, 255, 255) # White | |
line_type = 2 | |
# Text positioning | |
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0] | |
text_x = (width - text_size[0]) // 2 | |
text_y = (height + text_size[1]) // 2 | |
# Write frames | |
for _ in range(fps * seconds): | |
frame = np.zeros((height, width, 3), dtype=np.uint8) | |
cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type) | |
video.write(frame) | |
video.release() | |
return output_path | |
def create_avatar_animation(text, output_path, language="en"): | |
"""Create a 3D avatar animation for the sign (simplified version)""" | |
# In a real implementation, this would use a 3D avatar system | |
# Here we'll just simulate it with a basic animation | |
width, height = 640, 480 | |
fps = 30 | |
duration = 3 # seconds | |
# Create video writer | |
fourcc = cv2.VideoWriter_fourcc(*'mp4v') | |
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) | |
# Create a simple animation with hands | |
frames = fps * duration | |
for i in range(frames): | |
# Create a dark blue background | |
frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([100, 60, 20], dtype=np.uint8) | |
# Draw a simple avatar body | |
cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1) | |
cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1) | |
# Animate hands based on frame number | |
t = i / frames | |
# Left hand movement | |
x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi)) | |
y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi)) | |
# Right hand movement | |
x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi)) | |
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi)) | |
# Draw hands | |
cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1) | |
cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1) | |
# Add text with current sign | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (255, 255, 255), 2) | |
if language == "ar": | |
# Right-to-left indicator | |
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (255, 255, 255), 1) | |
video.write(frame) | |
video.release() | |
return output_path | |
def generate_sign_video(tokens, language, output_format="3D"): | |
"""Generate sign language video for the given tokens""" | |
# For each token, either find a pre-recorded video or generate one | |
temp_dir = tempfile.gettempdir() | |
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4") | |
# In a real implementation, this would concatenate actual sign videos | |
# For this demo, we'll create a simple animation | |
if language in SIGN_DICT and tokens[0] in SIGN_DICT[language]: | |
# In a real implementation, this would load the video file | |
# For demo purposes, we'll create an animation | |
create_avatar_animation(tokens[0], output_path, language) | |
else: | |
# Generate a default video with text | |
generate_default_sign_video(tokens[0], output_path, language) | |
return output_path | |
def translate_to_sign(text, output_format="3D"): | |
"""Main function to translate text to sign language video""" | |
if not text: | |
return None, "" | |
# Detect the input language | |
language = detect_language(text) | |
if language == "unknown": | |
return None, "Could not determine the language. Please use English or Arabic." | |
try: | |
# Tokenize the text | |
tokens = tokenize_text(text, language) | |
if not tokens: | |
return None, "No translatable tokens found." | |
# Generate sign language video | |
video_path = generate_sign_video(tokens, language, output_format) | |
# Prepare status message | |
if language == "en": | |
status = f"Translated English: \"{text}\" to sign language." | |
else: | |
status = f"Translated Arabic: \"{text}\" to sign language." | |
return video_path, status | |
except Exception as e: | |
error_msg = str(e) | |
print(f"Error during translation: {error_msg}") | |
return None, f"Error during translation: {error_msg}" | |
# Create the Gradio interface | |
with gr.Blocks(title=TITLE) as demo: | |
gr.Markdown(f"# {TITLE}") | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(): | |
with gr.Column(): | |
# Input area | |
text_input = gr.Textbox( | |
lines=4, | |
placeholder="Enter English or Arabic text here...", | |
label="Text Input" | |
) | |
format_dropdown = gr.Dropdown( | |
choices=["3D", "2D"], | |
value="3D", | |
label="Avatar Style" | |
) | |
with gr.Row(): | |
clear_btn = gr.Button("Clear") | |
translate_btn = gr.Button("Translate to Sign Language", variant="primary") | |
# Status area | |
status_output = gr.Textbox(label="Status", interactive=False) | |
with gr.Column(): | |
# Output video | |
video_output = gr.Video( | |
label="Sign Language Output", | |
format="mp4", | |
autoplay=True, | |
show_download_button=True | |
) | |
# Examples in both languages | |
gr.Examples( | |
examples=[ | |
["Hello, how can I help you today?"], | |
["Please wait while I check your account."], | |
["Thank you for your patience."], | |
["مرحبا، كيف يمكنني مساعدتك اليوم؟"], | |
["من فضلك انتظر بينما أتحقق من حسابك."], | |
["شكرا لصبرك."] | |
], | |
inputs=[text_input], | |
outputs=[video_output, status_output], | |
fn=lambda text: translate_to_sign(text) | |
) | |
# Event handlers | |
translate_btn.click( | |
fn=translate_to_sign, | |
inputs=[text_input, format_dropdown], | |
outputs=[video_output, status_output] | |
) | |
clear_btn.click( | |
fn=lambda: ("", "Input cleared"), | |
inputs=None, | |
outputs=[text_input, status_output] | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() | |