Spaces:

walaa2022
/

signlanguage

Sleeping

App Files Files Community

walaa2022 commited on Feb 24

Commit

8099126

verified ·

1 Parent(s): 803cb60

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -334

app.py CHANGED Viewed

@@ -1,89 +1,76 @@
 import os
 import sys
-import gradio as gr
 import tempfile
 import uuid
-import json
 import requests
 from pathlib import Path
-import cv2
-import numpy as np
-from moviepy.editor import VideoFileClip, concatenate_videoclips
-import mediapipe as mp
-import time
-# Try to import translation library - fallback to local method if not available
 try:
-    from deep_translator import GoogleTranslator
-    translation_available = True
 except ImportError:
-    translation_available = False
 # Define the title and description
-TITLE = "Complete Sign Language Translation System"
-DESCRIPTION = """This application translates English and Arabic text into sign language using real sign language videos.
-It translates Arabic to English when needed, then maps the English text to sign language videos.
 **Features:**
 - Supports both English and Arabic input
-- Uses real sign videos from SignDict and other sources
-- MediaPipe integration for gesture recognition and hand tracking
-- Automatic language detection and translation
 """
-# Initialize MediaPipe
-mp_hands = mp.solutions.hands
-mp_drawing = mp.solutions.drawing_utils
-mp_drawing_styles = mp.solutions.drawing_styles
-mp_holistic = mp.solutions.holistic
 # Define paths for sign language videos
 VIDEO_ROOT = "sign_videos"
 os.makedirs(VIDEO_ROOT, exist_ok=True)
 os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
-os.makedirs(f"{VIDEO_ROOT}/ar", exist_ok=True)
-# Define video URLs - use these to download videos on first run
-# In a real application, you would have a more extensive database
-SIGN_VIDEOS_URLS = {
-    "hello": "https://media.signbsl.com/videos/bsl/signstation/mp4/hello.mp4",
-    "thank": "https://media.signbsl.com/videos/bsl/signstation/mp4/thank_you.mp4",
-    "yes": "https://media.signbsl.com/videos/bsl/signstation/mp4/yes.mp4",
-    "no": "https://media.signbsl.com/videos/bsl/signstation/mp4/no.mp4",
-    "please": "https://media.signbsl.com/videos/bsl/signstation/mp4/please.mp4",
-    "help": "https://media.signbsl.com/videos/bsl/signstation/mp4/help.mp4",
-}
 # Define mapping of words to video files
 SIGN_DICT = {
     "en": {
         "hello": f"{VIDEO_ROOT}/en/hello.mp4",
-        "hi": f"{VIDEO_ROOT}/en/hello.mp4",  # Map to same video
-        "welcome": f"{VIDEO_ROOT}/en/welcome.mp4",
         "thank": f"{VIDEO_ROOT}/en/thank.mp4",
         "you": f"{VIDEO_ROOT}/en/you.mp4",
-        "thanks": f"{VIDEO_ROOT}/en/thank.mp4",  # Map to same video
         "please": f"{VIDEO_ROOT}/en/please.mp4",
         "wait": f"{VIDEO_ROOT}/en/wait.mp4",
         "help": f"{VIDEO_ROOT}/en/help.mp4",
         "yes": f"{VIDEO_ROOT}/en/yes.mp4",
         "no": f"{VIDEO_ROOT}/en/no.mp4",
-        "how": f"{VIDEO_ROOT}/en/how.mp4",
-        "can": f"{VIDEO_ROOT}/en/can.mp4",
-        "i": f"{VIDEO_ROOT}/en/i.mp4",
-        "service": f"{VIDEO_ROOT}/en/service.mp4",
-        "customer": f"{VIDEO_ROOT}/en/customer.mp4",
-        "sorry": f"{VIDEO_ROOT}/en/sorry.mp4",
     }
 }
 # Create a dictionary for English to Arabic translations and vice versa
 TRANSLATIONS = {
     "hello": "مرحبا",
-    "hi": "مرحبا",
     "welcome": "أهلا وسهلا",
     "thank you": "شكرا",
-    "thanks": "شكرا",
     "please": "من فضلك",
     "wait": "انتظر",
     "help": "مساعدة",
@@ -92,67 +79,9 @@ TRANSLATIONS = {
     "how can i help you": "كيف يمكنني مساعدتك",
     "customer": "عميل",
     "service": "خدمة",
-    "support": "دعم",
     "sorry": "آسف",
 }
-# Function to download videos if they don't exist
-def download_sign_videos():
-    """Download sign language videos on first run"""
-    for word, url in SIGN_VIDEOS_URLS.items():
-        output_path = f"{VIDEO_ROOT}/en/{word}.mp4"
-        if not os.path.exists(output_path):
-            try:
-                print(f"Downloading {word} sign video...")
-                response = requests.get(url)
-                if response.status_code == 200:
-                    with open(output_path, 'wb') as f:
-                        f.write(response.content)
-                    print(f"Downloaded {word} sign video")
-                else:
-                    print(f"Failed to download {word} sign video: {response.status_code}")
-            except Exception as e:
-                print(f"Error downloading {word} sign video: {e}")
-# Function to create placeholder videos when real ones don't exist yet
-def create_placeholder_video(text, output_path, language="en"):
-    """Create a placeholder video with text when a real video isn't available"""
-    height, width = 480, 640
-    fps = 30
-    seconds = 1.5
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-    font = cv2.FONT_HERSHEY_SIMPLEX
-    font_scale = 1
-    font_color = (255, 255, 255)
-    line_type = 2
-    # Text positioning
-    text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
-    text_x = (width - text_size[0]) // 2
-    text_y = (height + text_size[1]) // 2
-    for _ in range(int(fps * seconds)):
-        # Create a gradient background to look more professional
-        frame = np.zeros((height, width, 3), dtype=np.uint8)
-        for y in range(height):
-            color = int(50 + (y / height) * 100)
-            frame[y, :] = [color, color, color + 30]
-        # Add the word text
-        cv2.putText(frame, text, (text_x, text_y), font, font_scale, font_color, line_type)
-        # Add language indicator
-        lang_text = "English" if language == "en" else "Arabic"
-        cv2.putText(frame, lang_text, (width - 120, 30), font, 0.7, font_color, 1)
-        video.write(frame)
-    video.release()
-    return output_path
 def detect_language(text):
     """Detect if the text is primarily English or Arabic"""
     if not text:
@@ -173,221 +102,76 @@ def detect_language(text):
         return "unknown"
 def translate_arabic_to_english(text):
-    """Translate Arabic text to English"""
     if not text:
         return "", "No text to translate"
-    # Check for direct translations in our dictionary
-    for eng, ar in TRANSLATIONS.items():
-        if ar in text:
-            text = text.replace(ar, eng)
-    # Try to use deep_translator if available
-    if translation_available:
-        try:
-            translator = GoogleTranslator(source='ar', target='en')
-            translation = translator.translate(text)
-            return translation, f"Translated to English: {translation}"
-        except Exception as e:
-            print(f"Translation error: {e}")
-            return text, f"Error during translation: {e}"
-    else:
-        # Fallback method - very basic word mapping
-        result = text
-        for en, ar in TRANSLATIONS.items():
-            result = result.replace(ar, en)
-        return result, "Used basic translation mapping"
-def tokenize_text(text, language="en"):
-    """Split the text into tokens that can be matched to sign videos"""
     # Convert to lowercase for English
-    if language == "en":
-        text = text.lower()
     # Simple tokenization by splitting on spaces
-    tokens = text.split()
-    # Try to match multi-word phrases first (like "thank you")
-    result = []
-    i = 0
-    while i < len(tokens):
-        # Try 3-word phrases, then 2-word, then single words
-        matched = False
-        for j in range(min(3, len(tokens) - i), 0, -1):
-            phrase = " ".join(tokens[i:i+j])
-            if language in SIGN_DICT and phrase in SIGN_DICT[language]:
-                result.append(phrase)
-                i += j
-                matched = True
-                break
-        # If no match found, add the single token
-        if not matched:
-            result.append(tokens[i])
-            i += 1
-    return result
-def analyze_sign_video(video_path):
-    """Use MediaPipe to analyze hand movements in a sign language video"""
-    try:
-        # Only process if the file exists
-        if not os.path.exists(video_path):
-            return None, "Video file not found"
-        cap = cv2.VideoCapture(video_path)
-        if not cap.isOpened():
-            return None, "Could not open video file"
-        # Get video properties
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        fps = cap.get(cv2.CAP_PROP_FPS)
-        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        # Initialize MediaPipe Holistic (includes hands, face, and pose)
-        with mp_holistic.Holistic(
-            min_detection_confidence=0.5,
-            min_tracking_confidence=0.5) as holistic:
-            # Output video with annotations
-            output_path = video_path.replace(".mp4", "_analyzed.mp4")
-            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-            frame_count = 0
-            while cap.isOpened():
-                success, image = cap.read()
-                if not success:
-                    break
-                # Convert image to RGB and process with MediaPipe
-                image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                results = holistic.process(image_rgb)
-                # Draw landmarks
-                annotated_image = image.copy()
-                # Draw face landmarks
-                if results.face_landmarks:
-                    mp_drawing.draw_landmarks(
-                        annotated_image,
-                        results.face_landmarks,
-                        mp_holistic.FACEMESH_CONTOURS,
-                        landmark_drawing_spec=None,
-                        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style())
-                # Draw pose landmarks
-                if results.pose_landmarks:
-                    mp_drawing.draw_landmarks(
-                        annotated_image,
-                        results.pose_landmarks,
-                        mp_holistic.POSE_CONNECTIONS,
-                        landmark_drawing_spec=mp_drawing_styles.get_default_pose_landmarks_style())
-                # Draw hand landmarks
-                if results.left_hand_landmarks:
-                    mp_drawing.draw_landmarks(
-                        annotated_image,
-                        results.left_hand_landmarks,
-                        mp_hands.HAND_CONNECTIONS,
-                        mp_drawing_styles.get_default_hand_landmarks_style(),
-                        mp_drawing_styles.get_default_hand_connections_style())
-                if results.right_hand_landmarks:
-                    mp_drawing.draw_landmarks(
-                        annotated_image,
-                        results.right_hand_landmarks,
-                        mp_hands.HAND_CONNECTIONS,
-                        mp_drawing_styles.get_default_hand_landmarks_style(),
-                        mp_drawing_styles.get_default_hand_connections_style())
-                # Add progress indicator
-                progress = frame_count / total_frames * 100
-                cv2.putText(annotated_image, f"Processing: {progress:.1f}%",
-                            (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
-                # Write the frame to output video
-                out.write(annotated_image)
-                frame_count += 1
-            cap.release()
-            out.release()
-            return output_path, f"Analyzed video with MediaPipe. Processed {frame_count} frames."
-    except Exception as e:
-        print(f"Error analyzing video: {e}")
-        return None, f"Error analyzing video: {e}"
-def create_sign_video(tokens, language="en", analyze=False):
-    """Create a sign language video by concatenating clips for each token"""
-    # Temporary directory for video processing
-    temp_dir = tempfile.gettempdir()
-    # List to store video paths for each token
-    video_paths = []
-    # For each token, find or create a video
-    for token in tokens:
-        # Check if we have a real video for this token
-        if language in SIGN_DICT and token in SIGN_DICT[language]:
-            video_path = SIGN_DICT[language][token]
-            # If the video file doesn't exist, create a placeholder
-            if not os.path.exists(video_path):
-                placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
-                create_placeholder_video(token, placeholder_path, language)
-                video_paths.append(placeholder_path)
-            else:
-                video_paths.append(video_path)
-        else:
-            # Create a placeholder video for this token
-            placeholder_path = os.path.join(temp_dir, f"{token}_{uuid.uuid4()}.mp4")
-            create_placeholder_video(token, placeholder_path, language)
-            video_paths.append(placeholder_path)
-    # If no videos were created, return None
-    if not video_paths:
-        return None, "No videos were created"
-    # Concatenate all videos
-    output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
-    try:
-        # Using moviepy to concatenate videos
-        clips = [VideoFileClip(vp) for vp in video_paths if os.path.exists(vp)]
-        if clips:
-            final_clip = concatenate_videoclips(clips)
-            final_clip.write_videofile(output_path, codec="libx264", audio=False)
-            # Clean up the temporary clips
-            for clip in clips:
-                clip.close()
-            # If analyze is True, use MediaPipe to analyze the video
-            if analyze:
-                analyzed_path, analysis_msg = analyze_sign_video(output_path)
-                if analyzed_path:
-                    return analyzed_path, analysis_msg
-            return output_path, f"Created sign language video with {len(clips)} clips"
-        else:
-            return None, "No valid video clips were found"
-    except Exception as e:
-        print(f"Error concatenating videos: {str(e)}")
-        # Fallback: return the first video if concatenation fails
-        if video_paths and os.path.exists(video_paths[0]):
-            return video_paths[0], "Failed to concatenate videos, returning single clip"
-        return None, f"Error creating video: {str(e)}"
-def translate_to_sign(text, analyze_video=False):
-    """Main function to translate text to sign language video"""
     if not text:
         return None, ""
-    # Download videos if needed (first run)
-    download_sign_videos()
     # Detect the input language
     language = detect_language(text)
     if language == "unknown":
@@ -397,28 +181,30 @@ def translate_to_sign(text, analyze_video=False):
         # If Arabic, translate to English first
         if language == "ar":
             english_text, translation_status = translate_arabic_to_english(text)
-            # Use English dictionary for sign videos
-            tokens = tokenize_text(english_text, "en")
-            language_for_signs = "en"
-            translation_info = f"Original Arabic: \"{text}\"\n{translation_status}\n"
         else:
-            # Use English text directly
-            tokens = tokenize_text(text, "en")
-            language_for_signs = "en"
             translation_info = ""
         if not tokens:
             return None, translation_info + "No translatable tokens found."
-        # Generate sign language video
-        video_path, video_status = create_sign_video(tokens, language_for_signs, analyze_video)
-        if not video_path:
-            return None, translation_info + "Failed to create sign language video."
         # Prepare status message
         status = translation_info + video_status
         return video_path, status
@@ -441,12 +227,6 @@ with gr.Blocks(title=TITLE) as demo:
                 label="Text Input"
             )
-            analyze_checkbox = gr.Checkbox(
-                label="Analyze with MediaPipe",
-                value=False,
-                info="Apply MediaPipe hand tracking to visualize sign gestures"
-            )
             with gr.Row():
                 clear_btn = gr.Button("Clear")
                 translate_btn = gr.Button("Translate to Sign Language", variant="primary")
@@ -455,34 +235,33 @@ with gr.Blocks(title=TITLE) as demo:
             status_output = gr.Textbox(label="Status", interactive=False)
         with gr.Column():
-            # Output video
-            video_output = gr.Video(
-                label="Sign Language Video",
                 format="mp4",
-                autoplay=True,
-                show_download_button=True
-            )
     # Examples in both languages
     gr.Examples(
         examples=[
-            ["Hello, how can I help you?", False],
-            ["Thank you for your patience.", False],
-            ["Yes, please wait.", True],  # With analysis
-            ["مرحبا", False],
-            ["شكرا", False],
-            ["نعم، من فضلك انتظر", True],  # With analysis
         ],
-        inputs=[text_input, analyze_checkbox],
-        outputs=[video_output, status_output],
         fn=translate_to_sign
     )
     # Event handlers
     translate_btn.click(
         fn=translate_to_sign,
-        inputs=[text_input, analyze_checkbox],
-        outputs=[video_output, status_output]
     )
     clear_btn.click(

 import os
 import sys
+import json
 import tempfile
 import uuid
 import requests
 from pathlib import Path
+# Auto-install required packages
+try:
+    import gradio as gr
+    import numpy as np
+except ImportError:
+    print("Installing required packages...")
+    os.system(f"{sys.executable} -m pip install gradio numpy requests")
+    import gradio as gr
+    import numpy as np
+# Try to import OpenCV - if not available, try to install it
 try:
+    import cv2
 except ImportError:
+    print("Installing OpenCV...")
+    os.system(f"{sys.executable} -m pip install opencv-python-headless")
+    try:
+        import cv2
+    except ImportError:
+        print("Failed to install OpenCV. Will use simple image processing only.")
+        cv2 = None
+# Try to import moviepy (for video concatenation) - if not available, we'll use a simpler approach
+try:
+    from moviepy.editor import VideoFileClip, concatenate_videoclips
+    moviepy_available = True
+except ImportError:
+    print("MoviePy not available. Will use simpler video processing.")
+    moviepy_available = False
 # Define the title and description
+TITLE = "Simple Sign Language Translator"
+DESCRIPTION = """This application translates English and Arabic text into sign language using simple video generation.
+It translates Arabic to English when needed, then maps the English text to sign language representations.
 **Features:**
 - Supports both English and Arabic input
+- Uses simple visual representations of signs
+- Automatic language detection
 """
 # Define paths for sign language videos
 VIDEO_ROOT = "sign_videos"
 os.makedirs(VIDEO_ROOT, exist_ok=True)
 os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
 # Define mapping of words to video files
 SIGN_DICT = {
     "en": {
         "hello": f"{VIDEO_ROOT}/en/hello.mp4",
         "thank": f"{VIDEO_ROOT}/en/thank.mp4",
         "you": f"{VIDEO_ROOT}/en/you.mp4",
         "please": f"{VIDEO_ROOT}/en/please.mp4",
         "wait": f"{VIDEO_ROOT}/en/wait.mp4",
         "help": f"{VIDEO_ROOT}/en/help.mp4",
         "yes": f"{VIDEO_ROOT}/en/yes.mp4",
         "no": f"{VIDEO_ROOT}/en/no.mp4",
     }
 }
 # Create a dictionary for English to Arabic translations and vice versa
 TRANSLATIONS = {
     "hello": "مرحبا",
     "welcome": "أهلا وسهلا",
     "thank you": "شكرا",
     "please": "من فضلك",
     "wait": "انتظر",
     "help": "مساعدة",
     "how can i help you": "كيف يمكنني مساعدتك",
     "customer": "عميل",
     "service": "خدمة",
     "sorry": "آسف",
 }
 def detect_language(text):
     """Detect if the text is primarily English or Arabic"""
     if not text:
         return "unknown"
 def translate_arabic_to_english(text):
+    """Translate Arabic text to English using dictionary lookup"""
     if not text:
         return "", "No text to translate"
+    # Very basic translation - look up Arabic phrases in our dictionary
+    result = text
+    for en, ar in TRANSLATIONS.items():
+        result = result.replace(ar, en)
+    return result, f"Translated to English: {result}"
+def tokenize_text(text):
+    """Split the text into tokens"""
     # Convert to lowercase for English
+    text = text.lower()
     # Simple tokenization by splitting on spaces
+    return text.split()
+def create_simple_sign_video(text, output_path):
+    """Create a simple video with text representation of sign language"""
+    if cv2 is None:
+        # If OpenCV is not available, create a very simple text file
+        with open(output_path.replace('.mp4', '.txt'), 'w') as f:
+            f.write(f"Sign representation for: {text}")
+        return output_path.replace('.mp4', '.txt'), "Created text representation (OpenCV not available)"
+    # If OpenCV is available, create a simple video
+    height, width = 480, 640
+    fps = 30
+    seconds = 2
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    font = cv2.FONT_HERSHEY_SIMPLEX
+    font_scale = 1.5
+    font_color = (255, 255, 255)
+    line_type = 2
+    # Text positioning
+    text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
+    text_x = (width - text_size[0]) // 2
+    text_y = (height + text_size[1]) // 2
+    for i in range(int(fps * seconds)):
+        # Create a gradient blue background
+        frame = np.zeros((height, width, 3), dtype=np.uint8)
+        for y in range(height):
+            blue_val = int(50 + (y / height) * 100)
+            frame[y, :] = [blue_val, 30, 20]  # BGR
+        # Make the text pulse slightly
+        pulse = 1.0 + 0.2 * np.sin(i * 0.2)
+        cv2.putText(frame, text, (text_x, text_y), font, font_scale * pulse, font_color, line_type)
+        # Add "SIGN LANGUAGE" text at bottom
+        cv2.putText(frame, "SIGN LANGUAGE", (width//2 - 100, height - 30),
+                    font, 0.7, (200, 200, 200), 1)
+        video.write(frame)
+    video.release()
+    return output_path, f"Created video representation for '{text}'"
+def translate_to_sign(text):
+    """Main function to translate text to sign language representation"""
     if not text:
         return None, ""
     # Detect the input language
     language = detect_language(text)
     if language == "unknown":
         # If Arabic, translate to English first
         if language == "ar":
             english_text, translation_status = translate_arabic_to_english(text)
+            original_text = text
+            translation_info = f"Original Arabic: \"{original_text}\"\n{translation_status}\n"
         else:
+            english_text = text
             translation_info = ""
+        # Tokenize the text
+        tokens = tokenize_text(english_text)
         if not tokens:
             return None, translation_info + "No translatable tokens found."
+        # Create a temporary directory for the output
+        temp_dir = tempfile.gettempdir()
+        output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
+        # Create a sign language video for the first token
+        # In a full implementation, you would create videos for all tokens and concatenate them
+        first_token = tokens[0] if tokens else "error"
+        video_path, video_status = create_simple_sign_video(first_token, output_path)
         # Prepare status message
         status = translation_info + video_status
+        if len(tokens) > 1:
+            status += f"\nNote: Only showing sign for first word. Full text: {english_text}"
         return video_path, status
                 label="Text Input"
             )
             with gr.Row():
                 clear_btn = gr.Button("Clear")
                 translate_btn = gr.Button("Translate to Sign Language", variant="primary")
             status_output = gr.Textbox(label="Status", interactive=False)
         with gr.Column():
+            # Output (video or text, depending on what's available)
+            output_display = gr.Video(
+                label="Sign Language Output",
                 format="mp4",
+                autoplay=True
+            ) if cv2 is not None else gr.Textbox(label="Sign Representation", lines=3)
     # Examples in both languages
     gr.Examples(
         examples=[
+            ["Hello, how can I help you?"],
+            ["Thank you for your patience."],
+            ["Yes, please wait."],
+            ["مرحبا"],
+            ["شكرا"],
+            ["نعم، من فضلك انتظر"],
         ],
+        inputs=[text_input],
+        outputs=[output_display, status_output],
         fn=translate_to_sign
     )
     # Event handlers
     translate_btn.click(
         fn=translate_to_sign,
+        inputs=[text_input],
+        outputs=[output_display, status_output]
     )
     clear_btn.click(