Spaces:

walaa2022
/

signlanguage

Sleeping

App Files Files Community

signlanguage / multilingual-sign-app.py

walaa2022

Upload multilingual-sign-app.py

80bb9b6 verified 5 months ago

raw

history blame

12.4 kB

	import os
	import sys
	import gradio as gr
	import requests
	import json
	from datetime import datetime
	import tempfile
	import uuid

	# Install required packages if not already installed
	try:
	import mediapipe as mp
	import cv2
	import numpy as np
	from googletrans import Translator
	except ImportError:
	print("Installing required packages...")
	os.system("pip install mediapipe opencv-python numpy googletrans==4.0.0-rc1 --quiet")
	import mediapipe as mp
	import cv2
	import numpy as np
	from googletrans import Translator

	TITLE = "Multilingual Sign Language Customer Assistant"
	DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
	The system automatically detects the input language and generates appropriate sign language visuals.

	Features:
	- Supports both English and Arabic text
	- Uses SignDict for English sign language vocabulary
	- Uses ArSL for Arabic sign language
	- Perfect for customer service and assistance scenarios
	"""

	# Initialize the translation components
	translator = Translator()
	mp_hands = mp.solutions.hands
	mp_drawing = mp.solutions.drawing_utils
	mp_drawing_styles = mp.solutions.drawing_styles
	mp_pose = mp.solutions.pose

	# SignDict - dictionary of common signs in both languages
	# In a production app, these would link to pre-recorded videos or 3D animations
	SIGN_DICT = {
	"en": {
	"hello": "signs/en/hello.mp4",
	"welcome": "signs/en/welcome.mp4",
	"thank you": "signs/en/thank_you.mp4",
	"help": "signs/en/help.mp4",
	"yes": "signs/en/yes.mp4",
	"no": "signs/en/no.mp4",
	"please": "signs/en/please.mp4",
	"wait": "signs/en/wait.mp4",
	"sorry": "signs/en/sorry.mp4",
	"how": "signs/en/how.mp4",
	"what": "signs/en/what.mp4",
	"where": "signs/en/where.mp4",
	"when": "signs/en/when.mp4",
	"who": "signs/en/who.mp4",
	"why": "signs/en/why.mp4",
	"customer": "signs/en/customer.mp4",
	"service": "signs/en/service.mp4",
	"support": "signs/en/support.mp4",
	"information": "signs/en/information.mp4",
	"question": "signs/en/question.mp4",
	"answer": "signs/en/answer.mp4",
	},
	"ar": {
	"مرحبا": "signs/ar/hello.mp4",
	"أهلا وسهلا": "signs/ar/welcome.mp4",
	"شكرا": "signs/ar/thank_you.mp4",
	"مساعدة": "signs/ar/help.mp4",
	"نعم": "signs/ar/yes.mp4",
	"لا": "signs/ar/no.mp4",
	"من فضلك": "signs/ar/please.mp4",
	"انتظر": "signs/ar/wait.mp4",
	"آسف": "signs/ar/sorry.mp4",
	"كيف": "signs/ar/how.mp4",
	"ماذا": "signs/ar/what.mp4",
	"أين": "signs/ar/where.mp4",
	"متى": "signs/ar/when.mp4",
	"من": "signs/ar/who.mp4",
	"لماذا": "signs/ar/why.mp4",
	"عميل": "signs/ar/customer.mp4",
	"خدمة": "signs/ar/service.mp4",
	"دعم": "signs/ar/support.mp4",
	"معلومات": "signs/ar/information.mp4",
	"سؤال": "signs/ar/question.mp4",
	"إجابة": "signs/ar/answer.mp4",
	}
	}

	def detect_language(text):
	"""Detect if text is primarily English or Arabic"""
	if not text:
	return "unknown"

	# Simple detection by character set
	arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
	english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')

	arabic_count = sum(1 for char in text if char in arabic_chars)
	english_count = sum(1 for char in text if char in english_chars)

	if arabic_count > english_count:
	return "ar"
	elif english_count > 0:
	return "en"
	else:
	return "unknown"

	def tokenize_text(text, language):
	"""Split text into tokens that can be matched to signs"""
	if language == "ar":
	# Arabic tokenization
	tokens = text.split()
	# Check for phrases
	phrases = []
	i = 0
	while i < len(tokens):
	# Try to match longest phrases first
	matched = False
	for j in range(min(3, len(tokens) - i), 0, -1):
	phrase = " ".join(tokens[i:i+j])
	if phrase in SIGN_DICT[language]:
	phrases.append(phrase)
	i += j
	matched = True
	break
	if not matched:
	phrases.append(tokens[i])
	i += 1
	return phrases
	else:
	# English tokenization
	tokens = text.lower().split()
	phrases = []
	i = 0
	while i < len(tokens):
	matched = False
	for j in range(min(3, len(tokens) - i), 0, -1):
	phrase = " ".join(tokens[i:i+j])
	if phrase in SIGN_DICT[language]:
	phrases.append(phrase)
	i += j
	matched = True
	break
	if not matched:
	phrases.append(tokens[i])
	i += 1
	return phrases

	def translate_if_needed(text, source_lang, target_lang):
	"""Translate text if it's not already in the target language"""
	if source_lang == target_lang:
	return text

	try:
	translation = translator.translate(text, src=source_lang, dest=target_lang)
	return translation.text
	except Exception as e:
	print(f"Translation error: {str(e)}")
	return text

	def generate_default_sign_video(text, output_path, language="en"):
	"""Generate a simple video with the text when no sign is available"""
	# Create a black frame with text
	height, width = 480, 640
	fps = 30
	seconds = 2

	# Create a VideoWriter object
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	# Create frames with text
	font = cv2.FONT_HERSHEY_SIMPLEX
	font_scale = 1
	font_color = (255, 255, 255) # White
	line_type = 2

	# Text positioning
	text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
	text_x = (width - text_size[0]) // 2
	text_y = (height + text_size[1]) // 2

	# Write frames
	for _ in range(fps * seconds):
	frame = np.zeros((height, width, 3), dtype=np.uint8)
	cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
	video.write(frame)

	video.release()
	return output_path

	def create_avatar_animation(text, output_path, language="en"):
	"""Create a 3D avatar animation for the sign (simplified version)"""
	# In a real implementation, this would use a 3D avatar system
	# Here we'll just simulate it with a basic animation

	width, height = 640, 480
	fps = 30
	duration = 3 # seconds

	# Create video writer
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	# Create a simple animation with hands
	frames = fps * duration
	for i in range(frames):
	# Create a dark blue background
	frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([100, 60, 20], dtype=np.uint8)

	# Draw a simple avatar body
	cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
	cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)

	# Animate hands based on frame number
	t = i / frames
	# Left hand movement
	x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
	y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
	# Right hand movement
	x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
	y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))

	# Draw hands
	cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
	cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)

	# Add text with current sign
	font = cv2.FONT_HERSHEY_SIMPLEX
	cv2.putText(frame, text, (width//2-100, height-50), font, 1, (255, 255, 255), 2)
	if language == "ar":
	# Right-to-left indicator
	cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (255, 255, 255), 1)

	video.write(frame)

	video.release()
	return output_path

	def generate_sign_video(tokens, language, output_format="3D"):
	"""Generate sign language video for the given tokens"""
	# For each token, either find a pre-recorded video or generate one
	temp_dir = tempfile.gettempdir()
	output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")

	# In a real implementation, this would concatenate actual sign videos
	# For this demo, we'll create a simple animation
	if language in SIGN_DICT and tokens[0] in SIGN_DICT[language]:
	# In a real implementation, this would load the video file
	# For demo purposes, we'll create an animation
	create_avatar_animation(tokens[0], output_path, language)
	else:
	# Generate a default video with text
	generate_default_sign_video(tokens[0], output_path, language)

	return output_path

	def translate_to_sign(text, output_format="3D"):
	"""Main function to translate text to sign language video"""
	if not text:
	return None, ""

	# Detect the input language
	language = detect_language(text)
	if language == "unknown":
	return None, "Could not determine the language. Please use English or Arabic."

	try:
	# Tokenize the text
	tokens = tokenize_text(text, language)
	if not tokens:
	return None, "No translatable tokens found."

	# Generate sign language video
	video_path = generate_sign_video(tokens, language, output_format)

	# Prepare status message
	if language == "en":
	status = f"Translated English: \"{text}\" to sign language."
	else:
	status = f"Translated Arabic: \"{text}\" to sign language."

	return video_path, status

	except Exception as e:
	error_msg = str(e)
	print(f"Error during translation: {error_msg}")
	return None, f"Error during translation: {error_msg}"

	# Create the Gradio interface
	with gr.Blocks(title=TITLE) as demo:
	gr.Markdown(f"# {TITLE}")
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	with gr.Column():
	# Input area
	text_input = gr.Textbox(
	lines=4,
	placeholder="Enter English or Arabic text here...",
	label="Text Input"
	)

	format_dropdown = gr.Dropdown(
	choices=["3D", "2D"],
	value="3D",
	label="Avatar Style"
	)

	with gr.Row():
	clear_btn = gr.Button("Clear")
	translate_btn = gr.Button("Translate to Sign Language", variant="primary")

	# Status area
	status_output = gr.Textbox(label="Status", interactive=False)

	with gr.Column():
	# Output video
	video_output = gr.Video(
	label="Sign Language Output",
	format="mp4",
	autoplay=True,
	show_download_button=True
	)

	# Examples in both languages
	gr.Examples(
	examples=[
	["Hello, how can I help you today?"],
	["Please wait while I check your account."],
	["Thank you for your patience."],
	["مرحبا، كيف يمكنني مساعدتك اليوم؟"],
	["من فضلك انتظر بينما أتحقق من حسابك."],
	["شكرا لصبرك."]
	],
	inputs=[text_input],
	outputs=[video_output, status_output],
	fn=lambda text: translate_to_sign(text)
	)

	# Event handlers
	translate_btn.click(
	fn=translate_to_sign,
	inputs=[text_input, format_dropdown],
	outputs=[video_output, status_output]
	)

	clear_btn.click(
	fn=lambda: ("", "Input cleared"),
	inputs=None,
	outputs=[text_input, status_output]
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()