import os import sys import gradio as gr import requests import json from datetime import datetime import tempfile import uuid import re # Install required packages if not already installed try: import mediapipe as mp import cv2 import numpy as np except ImportError: print("Installing required packages...") os.system("pip install mediapipe opencv-python numpy --quiet") import mediapipe as mp import cv2 import numpy as np TITLE = "Multilingual Sign Language Customer Assistant" DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance. The system automatically detects the input language and generates appropriate sign language visuals. **Features:** - Supports both English and Arabic text - Uses 3D avatar technology to generate sign language - Perfect for customer service and assistance scenarios """ # Initialize MediaPipe mp_hands = mp.solutions.hands mp_drawing = mp.solutions.drawing_utils mp_drawing_styles = mp.solutions.drawing_styles mp_pose = mp.solutions.pose # Dictionary of translations for common customer service phrases TRANSLATIONS = { "hello": "مرحبا", "welcome": "أهلا وسهلا", "thank you": "شكرا", "help": "مساعدة", "yes": "نعم", "no": "لا", "please": "من فضلك", "wait": "انتظر", "sorry": "آسف", "how can i help you": "كيف يمكنني مساعدتك", "customer": "عميل", "service": "خدمة", "support": "دعم", "information": "معلومات", "question": "سؤال", "answer": "إجابة", } # SignDict - dictionary of common signs in both languages # In a production app, these would link to pre-recorded videos or 3D animations SIGN_DICT = { "en": { "hello": "signs/en/hello.mp4", "welcome": "signs/en/welcome.mp4", "thank you": "signs/en/thank_you.mp4", "help": "signs/en/help.mp4", "yes": "signs/en/yes.mp4", "no": "signs/en/no.mp4", "please": "signs/en/please.mp4", "wait": "signs/en/wait.mp4", "sorry": "signs/en/sorry.mp4", "how": "signs/en/how.mp4", "what": "signs/en/what.mp4", "where": "signs/en/where.mp4", "when": "signs/en/when.mp4", "who": "signs/en/who.mp4", "why": "signs/en/why.mp4", "customer": "signs/en/customer.mp4", "service": "signs/en/service.mp4", "support": "signs/en/support.mp4", "information": "signs/en/information.mp4", "question": "signs/en/question.mp4", "answer": "signs/en/answer.mp4", }, "ar": { "مرحبا": "signs/ar/hello.mp4", "أهلا وسهلا": "signs/ar/welcome.mp4", "شكرا": "signs/ar/thank_you.mp4", "مساعدة": "signs/ar/help.mp4", "نعم": "signs/ar/yes.mp4", "لا": "signs/ar/no.mp4", "من فضلك": "signs/ar/please.mp4", "انتظر": "signs/ar/wait.mp4", "آسف": "signs/ar/sorry.mp4", "كيف": "signs/ar/how.mp4", "ماذا": "signs/ar/what.mp4", "أين": "signs/ar/where.mp4", "متى": "signs/ar/when.mp4", "من": "signs/ar/who.mp4", "لماذا": "signs/ar/why.mp4", "عميل": "signs/ar/customer.mp4", "خدمة": "signs/ar/service.mp4", "دعم": "signs/ar/support.mp4", "معلومات": "signs/ar/information.mp4", "سؤال": "signs/ar/question.mp4", "إجابة": "signs/ar/answer.mp4", } } def detect_language(text): """Detect if text is primarily English or Arabic""" if not text: return "unknown" # Simple detection by character set arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي') english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') arabic_count = sum(1 for char in text if char in arabic_chars) english_count = sum(1 for char in text if char in english_chars) if arabic_count > english_count: return "ar" elif english_count > 0: return "en" else: return "unknown" def translate_text(text, source_lang, target_lang): """Simple dictionary-based translation""" if source_lang == target_lang: return text # Convert to lowercase for matching text_lower = text.lower() # For English to Arabic if source_lang == "en" and target_lang == "ar": for eng, ar in TRANSLATIONS.items(): text_lower = text_lower.replace(eng, ar) return text_lower # For Arabic to English if source_lang == "ar" and target_lang == "en": for eng, ar in TRANSLATIONS.items(): text_lower = text_lower.replace(ar, eng) return text_lower return text # Return original if no translation path def tokenize_text(text, language): """Split text into tokens that can be matched to signs""" if language == "ar": # Arabic tokenization tokens = text.split() # Check for phrases phrases = [] i = 0 while i < len(tokens): # Try to match longest phrases first matched = False for j in range(min(3, len(tokens) - i), 0, -1): phrase = " ".join(tokens[i:i+j]) if phrase in SIGN_DICT[language]: phrases.append(phrase) i += j matched = True break if not matched: phrases.append(tokens[i]) i += 1 return phrases else: # English tokenization tokens = text.lower().split() phrases = [] i = 0 while i < len(tokens): matched = False for j in range(min(3, len(tokens) - i), 0, -1): phrase = " ".join(tokens[i:i+j]) if phrase in SIGN_DICT[language]: phrases.append(phrase) i += j matched = True break if not matched: phrases.append(tokens[i]) i += 1 return phrases def generate_default_sign_video(text, output_path, language="en"): """Generate a simple video with the text when no sign is available""" # Create a black frame with text height, width = 480, 640 fps = 30 seconds = 2 # Create a VideoWriter object fourcc = cv2.VideoWriter_fourcc(*'mp4v') video = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) # Create frames with text font = cv2.FONT_HERSHEY_SIMPLEX font_scale = 1 font_color = (255, 255, 255) # White line_type = 2 # Text positioning text_size = cv2.getTextSize(text, font, font_scale, line_type)[0] text_x = (width - text_size[0]) // 2 text_y = (height + text_size[1]) // 2 # Write frames for _ in range(fps * seconds): frame = np.zeros((height, width, 3), dtype=np.uint8) cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type) video.write(frame) video.release() return output_path def create_avatar_animation(text, output_path, language="en", style="3D"): """Create a 3D avatar animation for the sign (simplified version)""" width, height = 640, 480 fps = 30 duration = 3 # seconds # Create video writer fourcc = cv2.VideoWriter_fourcc(*'mp4v') video = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) # Create a simple animation with hands frames = fps * duration for i in range(frames): # Create a background based on style if style == "3D": # Create a gradient background frame = np.zeros((height, width, 3), dtype=np.uint8) for y in range(height): for x in range(width): frame[y, x] = [ int(100 + 50 * (x / width)), int(60 + 30 * (y / height)), int(120 + 40 * ((x+y) / (width+height))) ] else: # Simple solid background for 2D frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8) # Draw a simple avatar if style == "3D": # 3D-style avatar # Body cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1) # Head cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1) # Animate hands based on frame number t = i / frames # Left hand movement x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi)) y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi)) # Right hand movement x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi)) y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi)) # Draw hands cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1) cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1) else: # 2D-style signing # Drawing a simplified 2D signer cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head # Animated hands for signing t = i / frames angle1 = t * 2 * np.pi angle2 = t * 2 * np.pi + np.pi/2 # Left arm x1 = int(width//2) y1 = int(height//2 - 70) x2 = int(x1 - 60 * np.cos(angle1)) y2 = int(y1 + 60 * np.sin(angle1)) cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2) # Right arm x3 = int(width//2) y3 = int(height//2 - 70) x4 = int(x3 + 60 * np.cos(angle2)) y4 = int(y3 + 60 * np.sin(angle2)) cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2) # Add text with current sign font = cv2.FONT_HERSHEY_SIMPLEX cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2) if language == "ar": # Right-to-left indicator cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1) video.write(frame) video.release() return output_path def generate_sign_video(tokens, language, output_format="3D"): """Generate sign language video for the given tokens""" # For each token, either find a pre-recorded video or generate one temp_dir = tempfile.gettempdir() output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4") # In a real implementation, this would concatenate actual sign videos # For this demo, we'll create a simple animation if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]: # In a real implementation, this would load the video file # For demo purposes, we'll create an animation create_avatar_animation(tokens[0], output_path, language, output_format) else: # Generate a default video with text if tokens: create_avatar_animation(tokens[0], output_path, language, output_format) else: create_avatar_animation("No tokens", output_path, language, output_format) return output_path def translate_to_sign(text, output_format="3D"): """Main function to translate text to sign language video""" if not text: return None, "" # Detect the input language language = detect_language(text) if language == "unknown": return None, "Could not determine the language. Please use English or Arabic." try: # Tokenize the text tokens = tokenize_text(text, language) if not tokens: return None, "No translatable tokens found." # Generate sign language video video_path = generate_sign_video(tokens, language, output_format) # Prepare status message if language == "en": status = f"Translated English: \"{text}\" to sign language." else: status = f"Translated Arabic: \"{text}\" to sign language." return video_path, status except Exception as e: error_msg = str(e) print(f"Error during translation: {error_msg}") return None, f"Error during translation: {error_msg}" # Create the Gradio interface with gr.Blocks(title=TITLE) as demo: gr.Markdown(f"# {TITLE}") gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(): # Input area text_input = gr.Textbox( lines=4, placeholder="Enter English or Arabic text here...", label="Text Input" ) format_dropdown = gr.Dropdown( choices=["3D", "2D"], value="3D", label="Avatar Style" ) with gr.Row(): clear_btn = gr.Button("Clear") translate_btn = gr.Button("Translate to Sign Language", variant="primary") # Status area status_output = gr.Textbox(label="Status", interactive=False) with gr.Column(): # Output video video_output = gr.Video( label="Sign Language Output", format="mp4", autoplay=True, show_download_button=True ) # Examples in both languages gr.Examples( examples=[ ["Hello, how can I help you today?", "3D"], ["Please wait while I check your account.", "3D"], ["Thank you for your patience.", "3D"], ["مرحبا، كيف يمكنني مساعدتك اليوم؟", "3D"], ["من فضلك انتظر بينما أتحقق من حسابك.", "3D"], ["شكرا لصبرك.", "3D"] ], inputs=[text_input, format_dropdown], outputs=[video_output, status_output], fn=translate_to_sign ) # Event handlers translate_btn.click( fn=translate_to_sign, inputs=[text_input, format_dropdown], outputs=[video_output, status_output] ) clear_btn.click( fn=lambda: ("", "Input cleared"), inputs=None, outputs=[text_input, status_output] ) # Launch the app if __name__ == "__main__": demo.launch()