signlanguage / app.py
walaa2022's picture
Update app.py
1ceb289 verified
raw
history blame
14.8 kB
import os
import sys
import gradio as gr
import requests
import json
from datetime import datetime
import tempfile
import uuid
import re
# Install required packages if not already installed
try:
import mediapipe as mp
import cv2
import numpy as np
except ImportError:
print("Installing required packages...")
os.system("pip install mediapipe opencv-python numpy --quiet")
import mediapipe as mp
import cv2
import numpy as np
TITLE = "Multilingual Sign Language Customer Assistant"
DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
The system automatically detects the input language and generates appropriate sign language visuals.
**Features:**
- Supports both English and Arabic text
- Uses 3D avatar technology to generate sign language
- Perfect for customer service and assistance scenarios
"""
# Initialize MediaPipe
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_pose = mp.solutions.pose
# Dictionary of translations for common customer service phrases
TRANSLATIONS = {
"hello": "مرحبا",
"welcome": "أهلا وسهلا",
"thank you": "شكرا",
"help": "مساعدة",
"yes": "نعم",
"no": "لا",
"please": "من فضلك",
"wait": "انتظر",
"sorry": "آسف",
"how can i help you": "كيف يمكنني مساعدتك",
"customer": "عميل",
"service": "خدمة",
"support": "دعم",
"information": "معلومات",
"question": "سؤال",
"answer": "إجابة",
}
# SignDict - dictionary of common signs in both languages
# In a production app, these would link to pre-recorded videos or 3D animations
SIGN_DICT = {
"en": {
"hello": "signs/en/hello.mp4",
"welcome": "signs/en/welcome.mp4",
"thank you": "signs/en/thank_you.mp4",
"help": "signs/en/help.mp4",
"yes": "signs/en/yes.mp4",
"no": "signs/en/no.mp4",
"please": "signs/en/please.mp4",
"wait": "signs/en/wait.mp4",
"sorry": "signs/en/sorry.mp4",
"how": "signs/en/how.mp4",
"what": "signs/en/what.mp4",
"where": "signs/en/where.mp4",
"when": "signs/en/when.mp4",
"who": "signs/en/who.mp4",
"why": "signs/en/why.mp4",
"customer": "signs/en/customer.mp4",
"service": "signs/en/service.mp4",
"support": "signs/en/support.mp4",
"information": "signs/en/information.mp4",
"question": "signs/en/question.mp4",
"answer": "signs/en/answer.mp4",
},
"ar": {
"مرحبا": "signs/ar/hello.mp4",
"أهلا وسهلا": "signs/ar/welcome.mp4",
"شكرا": "signs/ar/thank_you.mp4",
"مساعدة": "signs/ar/help.mp4",
"نعم": "signs/ar/yes.mp4",
"لا": "signs/ar/no.mp4",
"من فضلك": "signs/ar/please.mp4",
"انتظر": "signs/ar/wait.mp4",
"آسف": "signs/ar/sorry.mp4",
"كيف": "signs/ar/how.mp4",
"ماذا": "signs/ar/what.mp4",
"أين": "signs/ar/where.mp4",
"متى": "signs/ar/when.mp4",
"من": "signs/ar/who.mp4",
"لماذا": "signs/ar/why.mp4",
"عميل": "signs/ar/customer.mp4",
"خدمة": "signs/ar/service.mp4",
"دعم": "signs/ar/support.mp4",
"معلومات": "signs/ar/information.mp4",
"سؤال": "signs/ar/question.mp4",
"إجابة": "signs/ar/answer.mp4",
}
}
def detect_language(text):
"""Detect if text is primarily English or Arabic"""
if not text:
return "unknown"
# Simple detection by character set
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
arabic_count = sum(1 for char in text if char in arabic_chars)
english_count = sum(1 for char in text if char in english_chars)
if arabic_count > english_count:
return "ar"
elif english_count > 0:
return "en"
else:
return "unknown"
def translate_text(text, source_lang, target_lang):
"""Simple dictionary-based translation"""
if source_lang == target_lang:
return text
# Convert to lowercase for matching
text_lower = text.lower()
# For English to Arabic
if source_lang == "en" and target_lang == "ar":
for eng, ar in TRANSLATIONS.items():
text_lower = text_lower.replace(eng, ar)
return text_lower
# For Arabic to English
if source_lang == "ar" and target_lang == "en":
for eng, ar in TRANSLATIONS.items():
text_lower = text_lower.replace(ar, eng)
return text_lower
return text # Return original if no translation path
def tokenize_text(text, language):
"""Split text into tokens that can be matched to signs"""
if language == "ar":
# Arabic tokenization
tokens = text.split()
# Check for phrases
phrases = []
i = 0
while i < len(tokens):
# Try to match longest phrases first
matched = False
for j in range(min(3, len(tokens) - i), 0, -1):
phrase = " ".join(tokens[i:i+j])
if phrase in SIGN_DICT[language]:
phrases.append(phrase)
i += j
matched = True
break
if not matched:
phrases.append(tokens[i])
i += 1
return phrases
else:
# English tokenization
tokens = text.lower().split()
phrases = []
i = 0
while i < len(tokens):
matched = False
for j in range(min(3, len(tokens) - i), 0, -1):
phrase = " ".join(tokens[i:i+j])
if phrase in SIGN_DICT[language]:
phrases.append(phrase)
i += j
matched = True
break
if not matched:
phrases.append(tokens[i])
i += 1
return phrases
def generate_default_sign_video(text, output_path, language="en"):
"""Generate a simple video with the text when no sign is available"""
# Create a black frame with text
height, width = 480, 640
fps = 30
seconds = 2
# Create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
# Create frames with text
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
font_color = (255, 255, 255) # White
line_type = 2
# Text positioning
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
text_x = (width - text_size[0]) // 2
text_y = (height + text_size[1]) // 2
# Write frames
for _ in range(fps * seconds):
frame = np.zeros((height, width, 3), dtype=np.uint8)
cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
video.write(frame)
video.release()
return output_path
def create_avatar_animation(text, output_path, language="en", style="3D"):
"""Create a 3D avatar animation for the sign (simplified version)"""
width, height = 640, 480
fps = 30
duration = 3 # seconds
# Create video writer
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
# Create a simple animation with hands
frames = fps * duration
for i in range(frames):
# Create a background based on style
if style == "3D":
# Create a gradient background
frame = np.zeros((height, width, 3), dtype=np.uint8)
for y in range(height):
for x in range(width):
frame[y, x] = [
int(100 + 50 * (x / width)),
int(60 + 30 * (y / height)),
int(120 + 40 * ((x+y) / (width+height)))
]
else:
# Simple solid background for 2D
frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)
# Draw a simple avatar
if style == "3D":
# 3D-style avatar
# Body
cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
# Head
cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
# Animate hands based on frame number
t = i / frames
# Left hand movement
x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
# Right hand movement
x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
# Draw hands
cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
else:
# 2D-style signing
# Drawing a simplified 2D signer
cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body
cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head
# Animated hands for signing
t = i / frames
angle1 = t * 2 * np.pi
angle2 = t * 2 * np.pi + np.pi/2
# Left arm
x1 = int(width//2)
y1 = int(height//2 - 70)
x2 = int(x1 - 60 * np.cos(angle1))
y2 = int(y1 + 60 * np.sin(angle1))
cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)
# Right arm
x3 = int(width//2)
y3 = int(height//2 - 70)
x4 = int(x3 + 60 * np.cos(angle2))
y4 = int(y3 + 60 * np.sin(angle2))
cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)
# Add text with current sign
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
if language == "ar":
# Right-to-left indicator
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)
video.write(frame)
video.release()
return output_path
def generate_sign_video(tokens, language, output_format="3D"):
"""Generate sign language video for the given tokens"""
# For each token, either find a pre-recorded video or generate one
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
# In a real implementation, this would concatenate actual sign videos
# For this demo, we'll create a simple animation
if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]:
# In a real implementation, this would load the video file
# For demo purposes, we'll create an animation
create_avatar_animation(tokens[0], output_path, language, output_format)
else:
# Generate a default video with text
if tokens:
create_avatar_animation(tokens[0], output_path, language, output_format)
else:
create_avatar_animation("No tokens", output_path, language, output_format)
return output_path
def translate_to_sign(text, output_format="3D"):
"""Main function to translate text to sign language video"""
if not text:
return None, ""
# Detect the input language
language = detect_language(text)
if language == "unknown":
return None, "Could not determine the language. Please use English or Arabic."
try:
# Tokenize the text
tokens = tokenize_text(text, language)
if not tokens:
return None, "No translatable tokens found."
# Generate sign language video
video_path = generate_sign_video(tokens, language, output_format)
# Prepare status message
if language == "en":
status = f"Translated English: \"{text}\" to sign language."
else:
status = f"Translated Arabic: \"{text}\" to sign language."
return video_path, status
except Exception as e:
error_msg = str(e)
print(f"Error during translation: {error_msg}")
return None, f"Error during translation: {error_msg}"
# Create the Gradio interface
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
# Input area
text_input = gr.Textbox(
lines=4,
placeholder="Enter English or Arabic text here...",
label="Text Input"
)
format_dropdown = gr.Dropdown(
choices=["3D", "2D"],
value="3D",
label="Avatar Style"
)
with gr.Row():
clear_btn = gr.Button("Clear")
translate_btn = gr.Button("Translate to Sign Language", variant="primary")
# Status area
status_output = gr.Textbox(label="Status", interactive=False)
with gr.Column():
# Output video
video_output = gr.Video(
label="Sign Language Output",
format="mp4",
autoplay=True,
show_download_button=True
)
# Examples in both languages
gr.Examples(
examples=[
["Hello, how can I help you today?", "3D"],
["Please wait while I check your account.", "3D"],
["Thank you for your patience.", "3D"],
["مرحبا، كيف يمكنني مساعدتك اليوم؟", "3D"],
["من فضلك انتظر بينما أتحقق من حسابك.", "3D"],
["شكرا لصبرك.", "3D"]
],
inputs=[text_input, format_dropdown],
outputs=[video_output, status_output],
fn=translate_to_sign
)
# Event handlers
translate_btn.click(
fn=translate_to_sign,
inputs=[text_input, format_dropdown],
outputs=[video_output, status_output]
)
clear_btn.click(
fn=lambda: ("", "Input cleared"),
inputs=None,
outputs=[text_input, status_output]
)
# Launch the app
if __name__ == "__main__":
demo.launch()