Spaces:

walaa2022
/

signlanguage

Sleeping

App Files Files Community

signlanguage / app.py

walaa2022

Update app.py

1ceb289 verified 5 months ago

raw

history blame

14.8 kB

	import os
	import sys
	import gradio as gr
	import requests
	import json
	from datetime import datetime
	import tempfile
	import uuid
	import re

	# Install required packages if not already installed
	try:
	import mediapipe as mp
	import cv2
	import numpy as np
	except ImportError:
	print("Installing required packages...")
	os.system("pip install mediapipe opencv-python numpy --quiet")
	import mediapipe as mp
	import cv2
	import numpy as np

	TITLE = "Multilingual Sign Language Customer Assistant"
	DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
	The system automatically detects the input language and generates appropriate sign language visuals.

	Features:
	- Supports both English and Arabic text
	- Uses 3D avatar technology to generate sign language
	- Perfect for customer service and assistance scenarios
	"""

	# Initialize MediaPipe
	mp_hands = mp.solutions.hands
	mp_drawing = mp.solutions.drawing_utils
	mp_drawing_styles = mp.solutions.drawing_styles
	mp_pose = mp.solutions.pose

	# Dictionary of translations for common customer service phrases
	TRANSLATIONS = {
	"hello": "مرحبا",
	"welcome": "أهلا وسهلا",
	"thank you": "شكرا",
	"help": "مساعدة",
	"yes": "نعم",
	"no": "لا",
	"please": "من فضلك",
	"wait": "انتظر",
	"sorry": "آسف",
	"how can i help you": "كيف يمكنني مساعدتك",
	"customer": "عميل",
	"service": "خدمة",
	"support": "دعم",
	"information": "معلومات",
	"question": "سؤال",
	"answer": "إجابة",
	}

	# SignDict - dictionary of common signs in both languages
	# In a production app, these would link to pre-recorded videos or 3D animations
	SIGN_DICT = {
	"en": {
	"hello": "signs/en/hello.mp4",
	"welcome": "signs/en/welcome.mp4",
	"thank you": "signs/en/thank_you.mp4",
	"help": "signs/en/help.mp4",
	"yes": "signs/en/yes.mp4",
	"no": "signs/en/no.mp4",
	"please": "signs/en/please.mp4",
	"wait": "signs/en/wait.mp4",
	"sorry": "signs/en/sorry.mp4",
	"how": "signs/en/how.mp4",
	"what": "signs/en/what.mp4",
	"where": "signs/en/where.mp4",
	"when": "signs/en/when.mp4",
	"who": "signs/en/who.mp4",
	"why": "signs/en/why.mp4",
	"customer": "signs/en/customer.mp4",
	"service": "signs/en/service.mp4",
	"support": "signs/en/support.mp4",
	"information": "signs/en/information.mp4",
	"question": "signs/en/question.mp4",
	"answer": "signs/en/answer.mp4",
	},
	"ar": {
	"مرحبا": "signs/ar/hello.mp4",
	"أهلا وسهلا": "signs/ar/welcome.mp4",
	"شكرا": "signs/ar/thank_you.mp4",
	"مساعدة": "signs/ar/help.mp4",
	"نعم": "signs/ar/yes.mp4",
	"لا": "signs/ar/no.mp4",
	"من فضلك": "signs/ar/please.mp4",
	"انتظر": "signs/ar/wait.mp4",
	"آسف": "signs/ar/sorry.mp4",
	"كيف": "signs/ar/how.mp4",
	"ماذا": "signs/ar/what.mp4",
	"أين": "signs/ar/where.mp4",
	"متى": "signs/ar/when.mp4",
	"من": "signs/ar/who.mp4",
	"لماذا": "signs/ar/why.mp4",
	"عميل": "signs/ar/customer.mp4",
	"خدمة": "signs/ar/service.mp4",
	"دعم": "signs/ar/support.mp4",
	"معلومات": "signs/ar/information.mp4",
	"سؤال": "signs/ar/question.mp4",
	"إجابة": "signs/ar/answer.mp4",
	}
	}

	def detect_language(text):
	"""Detect if text is primarily English or Arabic"""
	if not text:
	return "unknown"

	# Simple detection by character set
	arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
	english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')

	arabic_count = sum(1 for char in text if char in arabic_chars)
	english_count = sum(1 for char in text if char in english_chars)

	if arabic_count > english_count:
	return "ar"
	elif english_count > 0:
	return "en"
	else:
	return "unknown"

	def translate_text(text, source_lang, target_lang):
	"""Simple dictionary-based translation"""
	if source_lang == target_lang:
	return text

	# Convert to lowercase for matching
	text_lower = text.lower()

	# For English to Arabic
	if source_lang == "en" and target_lang == "ar":
	for eng, ar in TRANSLATIONS.items():
	text_lower = text_lower.replace(eng, ar)
	return text_lower

	# For Arabic to English
	if source_lang == "ar" and target_lang == "en":
	for eng, ar in TRANSLATIONS.items():
	text_lower = text_lower.replace(ar, eng)
	return text_lower

	return text # Return original if no translation path

	def tokenize_text(text, language):
	"""Split text into tokens that can be matched to signs"""
	if language == "ar":
	# Arabic tokenization
	tokens = text.split()
	# Check for phrases
	phrases = []
	i = 0
	while i < len(tokens):
	# Try to match longest phrases first
	matched = False
	for j in range(min(3, len(tokens) - i), 0, -1):
	phrase = " ".join(tokens[i:i+j])
	if phrase in SIGN_DICT[language]:
	phrases.append(phrase)
	i += j
	matched = True
	break
	if not matched:
	phrases.append(tokens[i])
	i += 1
	return phrases
	else:
	# English tokenization
	tokens = text.lower().split()
	phrases = []
	i = 0
	while i < len(tokens):
	matched = False
	for j in range(min(3, len(tokens) - i), 0, -1):
	phrase = " ".join(tokens[i:i+j])
	if phrase in SIGN_DICT[language]:
	phrases.append(phrase)
	i += j
	matched = True
	break
	if not matched:
	phrases.append(tokens[i])
	i += 1
	return phrases

	def generate_default_sign_video(text, output_path, language="en"):
	"""Generate a simple video with the text when no sign is available"""
	# Create a black frame with text
	height, width = 480, 640
	fps = 30
	seconds = 2

	# Create a VideoWriter object
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	# Create frames with text
	font = cv2.FONT_HERSHEY_SIMPLEX
	font_scale = 1
	font_color = (255, 255, 255) # White
	line_type = 2

	# Text positioning
	text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
	text_x = (width - text_size[0]) // 2
	text_y = (height + text_size[1]) // 2

	# Write frames
	for _ in range(fps * seconds):
	frame = np.zeros((height, width, 3), dtype=np.uint8)
	cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
	video.write(frame)

	video.release()
	return output_path

	def create_avatar_animation(text, output_path, language="en", style="3D"):
	"""Create a 3D avatar animation for the sign (simplified version)"""
	width, height = 640, 480
	fps = 30
	duration = 3 # seconds

	# Create video writer
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	# Create a simple animation with hands
	frames = fps * duration
	for i in range(frames):
	# Create a background based on style
	if style == "3D":
	# Create a gradient background
	frame = np.zeros((height, width, 3), dtype=np.uint8)
	for y in range(height):
	for x in range(width):
	frame[y, x] = [
	int(100 + 50 * (x / width)),
	int(60 + 30 * (y / height)),
	int(120 + 40 * ((x+y) / (width+height)))
	]
	else:
	# Simple solid background for 2D
	frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)

	# Draw a simple avatar
	if style == "3D":
	# 3D-style avatar
	# Body
	cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
	# Head
	cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)

	# Animate hands based on frame number
	t = i / frames
	# Left hand movement
	x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
	y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
	# Right hand movement
	x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
	y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))

	# Draw hands
	cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
	cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
	else:
	# 2D-style signing
	# Drawing a simplified 2D signer
	cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body
	cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head

	# Animated hands for signing
	t = i / frames
	angle1 = t * 2 * np.pi
	angle2 = t * 2 * np.pi + np.pi/2

	# Left arm
	x1 = int(width//2)
	y1 = int(height//2 - 70)
	x2 = int(x1 - 60 * np.cos(angle1))
	y2 = int(y1 + 60 * np.sin(angle1))
	cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)

	# Right arm
	x3 = int(width//2)
	y3 = int(height//2 - 70)
	x4 = int(x3 + 60 * np.cos(angle2))
	y4 = int(y3 + 60 * np.sin(angle2))
	cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)

	# Add text with current sign
	font = cv2.FONT_HERSHEY_SIMPLEX
	cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
	if language == "ar":
	# Right-to-left indicator
	cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)

	video.write(frame)

	video.release()
	return output_path

	def generate_sign_video(tokens, language, output_format="3D"):
	"""Generate sign language video for the given tokens"""
	# For each token, either find a pre-recorded video or generate one
	temp_dir = tempfile.gettempdir()
	output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")

	# In a real implementation, this would concatenate actual sign videos
	# For this demo, we'll create a simple animation
	if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]:
	# In a real implementation, this would load the video file
	# For demo purposes, we'll create an animation
	create_avatar_animation(tokens[0], output_path, language, output_format)
	else:
	# Generate a default video with text
	if tokens:
	create_avatar_animation(tokens[0], output_path, language, output_format)
	else:
	create_avatar_animation("No tokens", output_path, language, output_format)

	return output_path

	def translate_to_sign(text, output_format="3D"):
	"""Main function to translate text to sign language video"""
	if not text:
	return None, ""

	# Detect the input language
	language = detect_language(text)
	if language == "unknown":
	return None, "Could not determine the language. Please use English or Arabic."

	try:
	# Tokenize the text
	tokens = tokenize_text(text, language)
	if not tokens:
	return None, "No translatable tokens found."

	# Generate sign language video
	video_path = generate_sign_video(tokens, language, output_format)

	# Prepare status message
	if language == "en":
	status = f"Translated English: \"{text}\" to sign language."
	else:
	status = f"Translated Arabic: \"{text}\" to sign language."

	return video_path, status

	except Exception as e:
	error_msg = str(e)
	print(f"Error during translation: {error_msg}")
	return None, f"Error during translation: {error_msg}"

	# Create the Gradio interface
	with gr.Blocks(title=TITLE) as demo:
	gr.Markdown(f"# {TITLE}")
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	with gr.Column():
	# Input area
	text_input = gr.Textbox(
	lines=4,
	placeholder="Enter English or Arabic text here...",
	label="Text Input"
	)

	format_dropdown = gr.Dropdown(
	choices=["3D", "2D"],
	value="3D",
	label="Avatar Style"
	)

	with gr.Row():
	clear_btn = gr.Button("Clear")
	translate_btn = gr.Button("Translate to Sign Language", variant="primary")

	# Status area
	status_output = gr.Textbox(label="Status", interactive=False)

	with gr.Column():
	# Output video
	video_output = gr.Video(
	label="Sign Language Output",
	format="mp4",
	autoplay=True,
	show_download_button=True
	)

	# Examples in both languages
	gr.Examples(
	examples=[
	["Hello, how can I help you today?", "3D"],
	["Please wait while I check your account.", "3D"],
	["Thank you for your patience.", "3D"],
	["مرحبا، كيف يمكنني مساعدتك اليوم؟", "3D"],
	["من فضلك انتظر بينما أتحقق من حسابك.", "3D"],
	["شكرا لصبرك.", "3D"]
	],
	inputs=[text_input, format_dropdown],
	outputs=[video_output, status_output],
	fn=translate_to_sign
	)

	# Event handlers
	translate_btn.click(
	fn=translate_to_sign,
	inputs=[text_input, format_dropdown],
	outputs=[video_output, status_output]
	)

	clear_btn.click(
	fn=lambda: ("", "Input cleared"),
	inputs=None,
	outputs=[text_input, status_output]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()