Spaces:

walaa2022
/

signlanguage

Sleeping

App Files Files Community

walaa2022 commited on Feb 24

Commit

c2dfdef

verified ·

1 Parent(s): d5cc7a1

Update app.py

Browse files

Files changed (1) hide show

app.py +351 -282

app.py CHANGED Viewed

@@ -1,117 +1,163 @@
 import os
 import sys
 import gradio as gr
-import requests
-import json
-from datetime import datetime
 import tempfile
 import uuid
-import re
-# Install required packages if not already installed
 try:
-    import mediapipe as mp
-    import cv2
-    import numpy as np
 except ImportError:
-    print("Installing required packages...")
-    os.system("pip install mediapipe opencv-python numpy --quiet")
-    import mediapipe as mp
-    import cv2
-    import numpy as np
-TITLE = "Multilingual Sign Language Customer Assistant"
-DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
-The system automatically detects the input language and generates appropriate sign language visuals.
 **Features:**
-- Supports both English and Arabic text
-- Uses 3D avatar technology to generate sign language
-- Perfect for customer service and assistance scenarios
 """
 # Initialize MediaPipe
 mp_hands = mp.solutions.hands
 mp_drawing = mp.solutions.drawing_utils
 mp_drawing_styles = mp.solutions.drawing_styles
-mp_pose = mp.solutions.pose
-# Dictionary of translations for common customer service phrases
 TRANSLATIONS = {
     "hello": "مرحبا",
     "welcome": "أهلا وسهلا",
     "thank you": "شكرا",
     "help": "مساعدة",
     "yes": "نعم",
     "no": "لا",
-    "please": "من فضلك",
-    "wait": "انتظر",
-    "sorry": "آسف",
     "how can i help you": "كيف يمكنني مساعدتك",
     "customer": "عميل",
     "service": "خدمة",
-    "support": "دعم",
-    "information": "معلومات",
-    "question": "سؤال",
-    "answer": "إجابة",
 }
-# SignDict - dictionary of common signs in both languages
-# In a production app, these would link to pre-recorded videos or 3D animations
-SIGN_DICT = {
-    "en": {
-        "hello": "signs/en/hello.mp4",
-        "welcome": "signs/en/welcome.mp4",
-        "thank you": "signs/en/thank_you.mp4",
-        "help": "signs/en/help.mp4",
-        "yes": "signs/en/yes.mp4",
-        "no": "signs/en/no.mp4",
-        "please": "signs/en/please.mp4",
-        "wait": "signs/en/wait.mp4",
-        "sorry": "signs/en/sorry.mp4",
-        "how": "signs/en/how.mp4",
-        "what": "signs/en/what.mp4",
-        "where": "signs/en/where.mp4",
-        "when": "signs/en/when.mp4",
-        "who": "signs/en/who.mp4",
-        "why": "signs/en/why.mp4",
-        "customer": "signs/en/customer.mp4",
-        "service": "signs/en/service.mp4",
-        "support": "signs/en/support.mp4",
-        "information": "signs/en/information.mp4",
-        "question": "signs/en/question.mp4",
-        "answer": "signs/en/answer.mp4",
-    },
-    "ar": {
-        "مرحبا": "signs/ar/hello.mp4",
-        "أهلا وسهلا": "signs/ar/welcome.mp4",
-        "شكرا": "signs/ar/thank_you.mp4",
-        "مساعدة": "signs/ar/help.mp4",
-        "نعم": "signs/ar/yes.mp4",
-        "لا": "signs/ar/no.mp4",
-        "من فضلك": "signs/ar/please.mp4",
-        "انتظر": "signs/ar/wait.mp4",
-        "آسف": "signs/ar/sorry.mp4",
-        "كيف": "signs/ar/how.mp4",
-        "ماذا": "signs/ar/what.mp4",
-        "أين": "signs/ar/where.mp4",
-        "متى": "signs/ar/when.mp4",
-        "من": "signs/ar/who.mp4",
-        "لماذا": "signs/ar/why.mp4",
-        "عميل": "signs/ar/customer.mp4",
-        "خدمة": "signs/ar/service.mp4",
-        "دعم": "signs/ar/support.mp4",
-        "معلومات": "signs/ar/information.mp4",
-        "سؤال": "signs/ar/question.mp4",
-        "إجابة": "signs/ar/answer.mp4",
-    }
-}
 def detect_language(text):
-    """Detect if text is primarily English or Arabic"""
     if not text:
         return "unknown"
     # Simple detection by character set
     arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
     english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
@@ -126,230 +172,253 @@ def detect_language(text):
     else:
         return "unknown"
-def translate_text(text, source_lang, target_lang):
-    """Simple dictionary-based translation"""
-    if source_lang == target_lang:
-        return text
-    # Convert to lowercase for matching
-    text_lower = text.lower()
-    # For English to Arabic
-    if source_lang == "en" and target_lang == "ar":
-        for eng, ar in TRANSLATIONS.items():
-            text_lower = text_lower.replace(eng, ar)
-        return text_lower
-    # For Arabic to English
-    if source_lang == "ar" and target_lang == "en":
-        for eng, ar in TRANSLATIONS.items():
-            text_lower = text_lower.replace(ar, eng)
-        return text_lower
-    return text  # Return original if no translation path
-def tokenize_text(text, language):
-    """Split text into tokens that can be matched to signs"""
-    if language == "ar":
-        # Arabic tokenization
-        tokens = text.split()
-        # Check for phrases
-        phrases = []
-        i = 0
-        while i < len(tokens):
-            # Try to match longest phrases first
-            matched = False
-            for j in range(min(3, len(tokens) - i), 0, -1):
-                phrase = " ".join(tokens[i:i+j])
-                if phrase in SIGN_DICT[language]:
-                    phrases.append(phrase)
-                    i += j
-                    matched = True
-                    break
-            if not matched:
-                phrases.append(tokens[i])
-                i += 1
-        return phrases
     else:
-        # English tokenization
-        tokens = text.lower().split()
-        phrases = []
-        i = 0
-        while i < len(tokens):
-            matched = False
-            for j in range(min(3, len(tokens) - i), 0, -1):
-                phrase = " ".join(tokens[i:i+j])
-                if phrase in SIGN_DICT[language]:
-                    phrases.append(phrase)
-                    i += j
-                    matched = True
-                    break
-            if not matched:
-                phrases.append(tokens[i])
-                i += 1
-        return phrases
-def generate_default_sign_video(text, output_path, language="en"):
-    """Generate a simple video with the text when no sign is available"""
-    # Create a black frame with text
-    height, width = 480, 640
-    fps = 30
-    seconds = 2
-    # Create a VideoWriter object
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-    # Create frames with text
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    font_scale = 1
-    font_color = (255, 255, 255)  # White
-    line_type = 2
-    # Text positioning
-    text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
-    text_x = (width - text_size[0]) // 2
-    text_y = (height + text_size[1]) // 2
-    # Write frames
-    for _ in range(fps * seconds):
-        frame = np.zeros((height, width, 3), dtype=np.uint8)
-        cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
-        video.write(frame)
-    video.release()
-    return output_path
-def create_avatar_animation(text, output_path, language="en", style="3D"):
-    """Create a 3D avatar animation for the sign (simplified version)"""
-    width, height = 640, 480
-    fps = 30
-    duration = 3  # seconds
-    # Create video writer
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-    # Create a simple animation with hands
-    frames = fps * duration
-    for i in range(frames):
-        # Create a background based on style
-        if style == "3D":
-            # Create a gradient background
-            frame = np.zeros((height, width, 3), dtype=np.uint8)
-            for y in range(height):
-                for x in range(width):
-                    frame[y, x] = [
-                        int(100 + 50 * (x / width)),
-                        int(60 + 30 * (y / height)),
-                        int(120 + 40 * ((x+y) / (width+height)))
-                    ]
-        else:
-            # Simple solid background for 2D
-            frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)
-        # Draw a simple avatar
-        if style == "3D":
-            # 3D-style avatar
-            # Body
-            cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
-            # Head
-            cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
-            # Animate hands based on frame number
-            t = i / frames
-            # Left hand movement
-            x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
-            y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
-            # Right hand movement
-            x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
-            y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
-            # Draw hands
-            cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
-            cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
-        else:
-            # 2D-style signing
-            # Drawing a simplified 2D signer
-            cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3)  # Body
-            cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2)  # Head
-            # Animated hands for signing
-            t = i / frames
-            angle1 = t * 2 * np.pi
-            angle2 = t * 2 * np.pi + np.pi/2
-            # Left arm
-            x1 = int(width//2)
-            y1 = int(height//2 - 70)
-            x2 = int(x1 - 60 * np.cos(angle1))
-            y2 = int(y1 + 60 * np.sin(angle1))
-            cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)
-            # Right arm
-            x3 = int(width//2)
-            y3 = int(height//2 - 70)
-            x4 = int(x3 + 60 * np.cos(angle2))
-            y4 = int(y3 + 60 * np.sin(angle2))
-            cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)
-        # Add text with current sign
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
-        if language == "ar":
-            # Right-to-left indicator
-            cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)
-        video.write(frame)
-    video.release()
-    return output_path
-def generate_sign_video(tokens, language, output_format="3D"):
-    """Generate sign language video for the given tokens"""
-    # For each token, either find a pre-recorded video or generate one
     temp_dir = tempfile.gettempdir()
-    output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
-    # In a real implementation, this would concatenate actual sign videos
-    # For this demo, we'll create a simple animation
-    if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]:
-        # In a real implementation, this would load the video file
-        # For demo purposes, we'll create an animation
-        create_avatar_animation(tokens[0], output_path, language, output_format)
-    else:
-        # Generate a default video with text
-        if tokens:
-            create_avatar_animation(tokens[0], output_path, language, output_format)
         else:
-            create_avatar_animation("No tokens", output_path, language, output_format)
-    return output_path
-def translate_to_sign(text, output_format="3D"):
     """Main function to translate text to sign language video"""
     if not text:
         return None, ""
     # Detect the input language
     language = detect_language(text)
     if language == "unknown":
         return None, "Could not determine the language. Please use English or Arabic."
     try:
-        # Tokenize the text
-        tokens = tokenize_text(text, language)
         if not tokens:
-            return None, "No translatable tokens found."
         # Generate sign language video
-        video_path = generate_sign_video(tokens, language, output_format)
         # Prepare status message
-        if language == "en":
-            status = f"Translated English: \"{text}\" to sign language."
-        else:
-            status = f"Translated Arabic: \"{text}\" to sign language."
         return video_path, status
@@ -372,10 +441,10 @@ with gr.Blocks(title=TITLE) as demo:
                 label="Text Input"
             )
-            format_dropdown = gr.Dropdown(
-                choices=["3D", "2D"],
-                value="3D",
-                label="Avatar Style"
             )
             with gr.Row():
@@ -388,7 +457,7 @@ with gr.Blocks(title=TITLE) as demo:
         with gr.Column():
             # Output video
             video_output = gr.Video(
-                label="Sign Language Output",
                 format="mp4",
                 autoplay=True,
                 show_download_button=True
@@ -397,14 +466,14 @@ with gr.Blocks(title=TITLE) as demo:
     # Examples in both languages
     gr.Examples(
         examples=[
-            ["Hello, how can I help you today?", "3D"],
-            ["Please wait while I check your account.", "3D"],
-            ["Thank you for your patience.", "3D"],
-            ["مرحبا، كيف يمكنني مساعدتك اليوم؟", "3D"],
-            ["من فضلك انتظر بينما أتحقق من حسابك.", "3D"],
-            ["شكرا لصبرك.", "3D"]
         ],
-        inputs=[text_input, format_dropdown],
         outputs=[video_output, status_output],
         fn=translate_to_sign
     )
@@ -412,7 +481,7 @@ with gr.Blocks(title=TITLE) as demo:
     # Event handlers
     translate_btn.click(
         fn=translate_to_sign,
-        inputs=[text_input, format_dropdown],
         outputs=[video_output, status_output]
     )

 import os
 import sys
 import gradio as gr
 import tempfile
 import uuid
+import json
+import requests
+from pathlib import Path
+import cv2
+import numpy as np
+from moviepy.editor import VideoFileClip, concatenate_videoclips
+import mediapipe as mp
+import time
+# Try to import translation library - fallback to local method if not available
 try:
+    from deep_translator import GoogleTranslator
+    translation_available = True
 except ImportError:
+    translation_available = False
+# Define the title and description
+TITLE = "Complete Sign Language Translation System"
+DESCRIPTION = """This application translates English and Arabic text into sign language using real sign language videos.
+It translates Arabic to English when needed, then maps the English text to sign language videos.
 **Features:**
+- Supports both English and Arabic input
+- Uses real sign videos from SignDict and other sources
+- MediaPipe integration for gesture recognition and hand tracking
+- Automatic language detection and translation
 """
 # Initialize MediaPipe
 mp_hands = mp.solutions.hands
 mp_drawing = mp.solutions.drawing_utils
 mp_drawing_styles = mp.solutions.drawing_styles
+mp_holistic = mp.solutions.holistic
+# Define paths for sign language videos
+VIDEO_ROOT = "sign_videos"
+os.makedirs(VIDEO_ROOT, exist_ok=True)
+os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
+os.makedirs(f"{VIDEO_ROOT}/ar", exist_ok=True)
+# Define video URLs - use these to download videos on first run
+# In a real application, you would have a more extensive database
+SIGN_VIDEOS_URLS = {
+    "hello": "https://media.signbsl.com/videos/bsl/signstation/mp4/hello.mp4",
+    "thank": "https://media.signbsl.com/videos/bsl/signstation/mp4/thank_you.mp4",
+    "yes": "https://media.signbsl.com/videos/bsl/signstation/mp4/yes.mp4",
+    "no": "https://media.signbsl.com/videos/bsl/signstation/mp4/no.mp4",
+    "please": "https://media.signbsl.com/videos/bsl/signstation/mp4/please.mp4",
+    "help": "https://media.signbsl.com/videos/bsl/signstation/mp4/help.mp4",
+}
+# Define mapping of words to video files
+SIGN_DICT = {
+    "en": {
+        "hello": f"{VIDEO_ROOT}/en/hello.mp4",
+        "hi": f"{VIDEO_ROOT}/en/hello.mp4",  # Map to same video
+        "welcome": f"{VIDEO_ROOT}/en/welcome.mp4",
+        "thank": f"{VIDEO_ROOT}/en/thank.mp4",
+        "you": f"{VIDEO_ROOT}/en/you.mp4",
+        "thanks": f"{VIDEO_ROOT}/en/thank.mp4",  # Map to same video
+        "please": f"{VIDEO_ROOT}/en/please.mp4",
+        "wait": f"{VIDEO_ROOT}/en/wait.mp4",
+        "help": f"{VIDEO_ROOT}/en/help.mp4",
+        "yes": f"{VIDEO_ROOT}/en/yes.mp4",
+        "no": f"{VIDEO_ROOT}/en/no.mp4",
+        "how": f"{VIDEO_ROOT}/en/how.mp4",
+        "can": f"{VIDEO_ROOT}/en/can.mp4",
+        "i": f"{VIDEO_ROOT}/en/i.mp4",
+        "service": f"{VIDEO_ROOT}/en/service.mp4",
+        "customer": f"{VIDEO_ROOT}/en/customer.mp4",
+        "sorry": f"{VIDEO_ROOT}/en/sorry.mp4",
+    }
+}
+# Create a dictionary for English to Arabic translations and vice versa
 TRANSLATIONS = {
     "hello": "مرحبا",
+    "hi": "مرحبا",
     "welcome": "أهلا وسهلا",
     "thank you": "شكرا",
+    "thanks": "شكرا",
+    "please": "من فضلك",
+    "wait": "انتظر",
     "help": "مساعدة",
     "yes": "نعم",
     "no": "لا",
     "how can i help you": "كيف يمكنني مساعدتك",
     "customer": "عميل",
     "service": "خدمة",
+    "support": "دعم",
+    "sorry": "آسف",
 }
+# Function to download videos if they don't exist
+def download_sign_videos():
+    """Download sign language videos on first run"""
+    for word, url in SIGN_VIDEOS_URLS.items():
+        output_path = f"{VIDEO_ROOT}/en/{word}.mp4"
+        if not os.path.exists(output_path):
+            try:
+                print(f"Downloading {word} sign video...")
+                response = requests.get(url)
+                if response.status_code == 200:
+                    with open(output_path, 'wb') as f:
+                        f.write(response.content)
+                    print(f"Downloaded {word} sign video")
+                else:
+                    print(f"Failed to download {word} sign video: {response.status_code}")
+            except Exception as e:
+                print(f"Error downloading {word} sign video: {e}")
+# Function to create placeholder videos when real ones don't exist yet
+def create_placeholder_video(text, output_path, language="en"):
+    """Create a placeholder video with text when a real video isn't available"""
+    height, width = 480, 640
+    fps = 30
+    seconds = 1.5
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 1
+    font_color = (255, 255, 255)
+    line_type = 2
+    # Text positioning
+    text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
+    text_x = (width - text_size[0]) // 2
+    text_y = (height + text_size[1]) // 2
+    for _ in range(int(fps * seconds)):
+        # Create a gradient background to look more professional
+        frame = np.zeros((height, width, 3), dtype=np.uint8)
+        for y in range(height):
+            color = int(50 + (y / height) * 100)
+            frame[y, :] = [color, color, color + 30]
+        # Add the word text
+        cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
+        # Add language indicator
+        lang_text = "English" if language == "en" else "Arabic"
+        cv2.putText(frame, lang_text, (width - 120, 30), font, 0.7, font_color, 1)
+        video.write(frame)
+    video.release()
+    return output_path
 def detect_language(text):
+    """Detect if the text is primarily English or Arabic"""
     if not text:
         return "unknown"
     # Simple detection by character set
     arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
     english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
     else:
         return "unknown"
+def translate_arabic_to_english(text):
+    """Translate Arabic text to English"""
+    if not text:
+        return "", "No text to translate"
+    # Check for direct translations in our dictionary
+    for eng, ar in TRANSLATIONS.items():
+        if ar in text:
+            text = text.replace(ar, eng)
+    # Try to use deep_translator if available
+    if translation_available:
+        try:
+            translator = GoogleTranslator(source='ar', target='en')
+            translation = translator.translate(text)
+            return translation, f"Translated to English: {translation}"
+        except Exception as e:
+            print(f"Translation error: {e}")
+            return text, f"Error during translation: {e}"
     else:
+        # Fallback method - very basic word mapping
+        result = text
+        for en, ar in TRANSLATIONS.items():
+            result = result.replace(ar, en)
+        return result, "Used basic translation mapping"
+def tokenize_text(text, language="en"):
+    """Split the text into tokens that can be matched to sign videos"""
+    # Convert to lowercase for English
+    if language == "en":
+        text = text.lower()
+    # Simple tokenization by splitting on spaces
+    tokens = text.split()
+    # Try to match multi-word phrases first (like "thank you")
+    result = []
+    i = 0
+    while i < len(tokens):
+        # Try 3-word phrases, then 2-word, then single words
+        matched = False
+        for j in range(min(3, len(tokens) - i), 0, -1):
+            phrase = " ".join(tokens[i:i+j])
+            if language in SIGN_DICT and phrase in SIGN_DICT[language]:
+                result.append(phrase)
+                i += j
+                matched = True
+                break
+        # If no match found, add the single token
+        if not matched:
+            result.append(tokens[i])
+            i += 1
+    return result
+def analyze_sign_video(video_path):
+    """Use MediaPipe to analyze hand movements in a sign language video"""
+    try:
+        # Only process if the file exists
+        if not os.path.exists(video_path):
+            return None, "Video file not found"
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            return None, "Could not open video file"
+        # Get video properties
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Initialize MediaPipe Holistic (includes hands, face, and pose)
+        with mp_holistic.Holistic(
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5) as holistic:
+            # Output video with annotations
+            output_path = video_path.replace(".mp4", "_analyzed.mp4")
+            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+            frame_count = 0
+            while cap.isOpened():
+                success, image = cap.read()
+                if not success:
+                    break
+                # Convert image to RGB and process with MediaPipe
+                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                results = holistic.process(image_rgb)
+                # Draw landmarks
+                annotated_image = image.copy()
+                # Draw face landmarks
+                if results.face_landmarks:
+                    mp_drawing.draw_landmarks(
+                        annotated_image,
+                        results.face_landmarks,
+                        mp_holistic.FACEMESH_CONTOURS,
+                        landmark_drawing_spec=None,
+                        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
+                # Draw pose landmarks
+                if results.pose_landmarks:
+                    mp_drawing.draw_landmarks(
+                        annotated_image,
+                        results.pose_landmarks,
+                        mp_holistic.POSE_CONNECTIONS,
+                        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
+                # Draw hand landmarks
+                if results.left_hand_landmarks:
+                    mp_drawing.draw_landmarks(
+                        annotated_image,
+                        results.left_hand_landmarks,
+                        mp_hands.HAND_CONNECTIONS,
+                        mp_drawing_styles.get_default_hand_landmarks_style(),
+                        mp_drawing_styles.get_default_hand_connections_style())
+                if results.right_hand_landmarks:
+                    mp_drawing.draw_landmarks(
+                        annotated_image,
+                        results.right_hand_landmarks,
+                        mp_hands.HAND_CONNECTIONS,
+                        mp_drawing_styles.get_default_hand_landmarks_style(),
+                        mp_drawing_styles.get_default_hand_connections_style())
+                # Add progress indicator
+                progress = frame_count / total_frames * 100
+                cv2.putText(annotated_image, f"Processing: {progress:.1f}%",
+                            (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+                # Write the frame to output video
+                out.write(annotated_image)
+                frame_count += 1
+            cap.release()
+            out.release()
+            return output_path, f"Analyzed video with MediaPipe. Processed {frame_count} frames."
+    except Exception as e:
+        print(f"Error analyzing video: {e}")
+        return None, f"Error analyzing video: {e}"
+def create_sign_video(tokens, language="en", analyze=False):
+    """Create a sign language video by concatenating clips for each token"""
+    # Temporary directory for video processing
     temp_dir = tempfile.gettempdir()
+    # List to store video paths for each token
+    video_paths = []
+    # For each token, find or create a video
+    for token in tokens:
+        # Check if we have a real video for this token
+        if language in SIGN_DICT and token in SIGN_DICT[language]:
+            video_path = SIGN_DICT[language][token]
+            # If the video file doesn't exist, create a placeholder
+            if not os.path.exists(video_path):
+                placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
+                create_placeholder_video(token, placeholder_path, language)
+                video_paths.append(placeholder_path)
+            else:
+                video_paths.append(video_path)
         else:
+            # Create a placeholder video for this token
+            placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
+            create_placeholder_video(token, placeholder_path, language)
+            video_paths.append(placeholder_path)
+    # If no videos were created, return None
+    if not video_paths:
+        return None, "No videos were created"
+    # Concatenate all videos
+    output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
+    try:
+        # Using moviepy to concatenate videos
+        clips = [VideoFileClip(vp) for vp in video_paths if os.path.exists(vp)]
+        if clips:
+            final_clip = concatenate_videoclips(clips)
+            final_clip.write_videofile(output_path, codec="libx264", audio=False)
+            # Clean up the temporary clips
+            for clip in clips:
+                clip.close()
+            # If analyze is True, use MediaPipe to analyze the video
+            if analyze:
+                analyzed_path, analysis_msg = analyze_sign_video(output_path)
+                if analyzed_path:
+                    return analyzed_path, analysis_msg
+            return output_path, f"Created sign language video with {len(clips)} clips"
+        else:
+            return None, "No valid video clips were found"
+    except Exception as e:
+        print(f"Error concatenating videos: {str(e)}")
+        # Fallback: return the first video if concatenation fails
+        if video_paths and os.path.exists(video_paths[0]):
+            return video_paths[0], "Failed to concatenate videos, returning single clip"
+        return None, f"Error creating video: {str(e)}"
+def translate_to_sign(text, analyze_video=False):
     """Main function to translate text to sign language video"""
     if not text:
         return None, ""
+    # Download videos if needed (first run)
+    download_sign_videos()
     # Detect the input language
     language = detect_language(text)
     if language == "unknown":
         return None, "Could not determine the language. Please use English or Arabic."
     try:
+        # If Arabic, translate to English first
+        if language == "ar":
+            english_text, translation_status = translate_arabic_to_english(text)
+            # Use English dictionary for sign videos
+            tokens = tokenize_text(english_text, "en")
+            language_for_signs = "en"
+            translation_info = f"Original Arabic: \"{text}\"\n{translation_status}\n"
+        else:
+            # Use English text directly
+            tokens = tokenize_text(text, "en")
+            language_for_signs = "en"
+            translation_info = ""
         if not tokens:
+            return None, translation_info + "No translatable tokens found."
         # Generate sign language video
+        video_path, video_status = create_sign_video(tokens, language_for_signs, analyze_video)
+        if not video_path:
+            return None, translation_info + "Failed to create sign language video."
         # Prepare status message
+        status = translation_info + video_status
         return video_path, status
                 label="Text Input"
             )
+            analyze_checkbox = gr.Checkbox(
+                label="Analyze with MediaPipe",
+                value=False,
+                info="Apply MediaPipe hand tracking to visualize sign gestures"
             )
             with gr.Row():
         with gr.Column():
             # Output video
             video_output = gr.Video(
+                label="Sign Language Video",
                 format="mp4",
                 autoplay=True,
                 show_download_button=True
     # Examples in both languages
     gr.Examples(
         examples=[
+            ["Hello, how can I help you?", False],
+            ["Thank you for your patience.", False],
+            ["Yes, please wait.", True],  # With analysis
+            ["مرحبا", False],
+            ["شكرا", False],
+            ["نعم، من فضلك انتظر", True],  # With analysis
         ],
+        inputs=[text_input, analyze_checkbox],
         outputs=[video_output, status_output],
         fn=translate_to_sign
     )
     # Event handlers
     translate_btn.click(
         fn=translate_to_sign,
+        inputs=[text_input, analyze_checkbox],
         outputs=[video_output, status_output]
     )