signlanguage / app.py
walaa2022's picture
Update app.py
8099126 verified
import os
import sys
import json
import tempfile
import uuid
import requests
from pathlib import Path
# Auto-install required packages
try:
import gradio as gr
import numpy as np
except ImportError:
print("Installing required packages...")
os.system(f"{sys.executable} -m pip install gradio numpy requests")
import gradio as gr
import numpy as np
# Try to import OpenCV - if not available, try to install it
try:
import cv2
except ImportError:
print("Installing OpenCV...")
os.system(f"{sys.executable} -m pip install opencv-python-headless")
try:
import cv2
except ImportError:
print("Failed to install OpenCV. Will use simple image processing only.")
cv2 = None
# Try to import moviepy (for video concatenation) - if not available, we'll use a simpler approach
try:
from moviepy.editor import VideoFileClip, concatenate_videoclips
moviepy_available = True
except ImportError:
print("MoviePy not available. Will use simpler video processing.")
moviepy_available = False
# Define the title and description
TITLE = "Simple Sign Language Translator"
DESCRIPTION = """This application translates English and Arabic text into sign language using simple video generation.
It translates Arabic to English when needed, then maps the English text to sign language representations.
**Features:**
- Supports both English and Arabic input
- Uses simple visual representations of signs
- Automatic language detection
"""
# Define paths for sign language videos
VIDEO_ROOT = "sign_videos"
os.makedirs(VIDEO_ROOT, exist_ok=True)
os.makedirs(f"{VIDEO_ROOT}/en", exist_ok=True)
# Define mapping of words to video files
SIGN_DICT = {
"en": {
"hello": f"{VIDEO_ROOT}/en/hello.mp4",
"thank": f"{VIDEO_ROOT}/en/thank.mp4",
"you": f"{VIDEO_ROOT}/en/you.mp4",
"please": f"{VIDEO_ROOT}/en/please.mp4",
"wait": f"{VIDEO_ROOT}/en/wait.mp4",
"help": f"{VIDEO_ROOT}/en/help.mp4",
"yes": f"{VIDEO_ROOT}/en/yes.mp4",
"no": f"{VIDEO_ROOT}/en/no.mp4",
}
}
# Create a dictionary for English to Arabic translations and vice versa
TRANSLATIONS = {
"hello": "مرحبا",
"welcome": "أهلا وسهلا",
"thank you": "شكرا",
"please": "من فضلك",
"wait": "انتظر",
"help": "مساعدة",
"yes": "نعم",
"no": "لا",
"how can i help you": "كيف يمكنني مساعدتك",
"customer": "عميل",
"service": "خدمة",
"sorry": "آسف",
}
def detect_language(text):
"""Detect if the text is primarily English or Arabic"""
if not text:
return "unknown"
# Simple detection by character set
arabic_chars = set('ءآأؤإئابةتثجحخدذرزسشصضطظعغفقكلمنهوي')
english_chars = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
arabic_count = sum(1 for char in text if char in arabic_chars)
english_count = sum(1 for char in text if char in english_chars)
if arabic_count > english_count:
return "ar"
elif english_count > 0:
return "en"
else:
return "unknown"
def translate_arabic_to_english(text):
"""Translate Arabic text to English using dictionary lookup"""
if not text:
return "", "No text to translate"
# Very basic translation - look up Arabic phrases in our dictionary
result = text
for en, ar in TRANSLATIONS.items():
result = result.replace(ar, en)
return result, f"Translated to English: {result}"
def tokenize_text(text):
"""Split the text into tokens"""
# Convert to lowercase for English
text = text.lower()
# Simple tokenization by splitting on spaces
return text.split()
def create_simple_sign_video(text, output_path):
"""Create a simple video with text representation of sign language"""
if cv2 is None:
# If OpenCV is not available, create a very simple text file
with open(output_path.replace('.mp4', '.txt'), 'w') as f:
f.write(f"Sign representation for: {text}")
return output_path.replace('.mp4', '.txt'), "Created text representation (OpenCV not available)"
# If OpenCV is available, create a simple video
height, width = 480, 640
fps = 30
seconds = 2
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1.5
font_color = (255, 255, 255)
line_type = 2
# Text positioning
text_size = cv2.getTextSize(text, font, font_scale, line_type)[0]
text_x = (width - text_size[0]) // 2
text_y = (height + text_size[1]) // 2
for i in range(int(fps * seconds)):
# Create a gradient blue background
frame = np.zeros((height, width, 3), dtype=np.uint8)
for y in range(height):
blue_val = int(50 + (y / height) * 100)
frame[y, :] = [blue_val, 30, 20] # BGR
# Make the text pulse slightly
pulse = 1.0 + 0.2 * np.sin(i * 0.2)
cv2.putText(frame, text, (text_x, text_y), font, font_scale * pulse, font_color, line_type)
# Add "SIGN LANGUAGE" text at bottom
cv2.putText(frame, "SIGN LANGUAGE", (width//2 - 100, height - 30),
font, 0.7, (200, 200, 200), 1)
video.write(frame)
video.release()
return output_path, f"Created video representation for '{text}'"
def translate_to_sign(text):
"""Main function to translate text to sign language representation"""
if not text:
return None, ""
# Detect the input language
language = detect_language(text)
if language == "unknown":
return None, "Could not determine the language. Please use English or Arabic."
try:
# If Arabic, translate to English first
if language == "ar":
english_text, translation_status = translate_arabic_to_english(text)
original_text = text
translation_info = f"Original Arabic: \"{original_text}\"\n{translation_status}\n"
else:
english_text = text
translation_info = ""
# Tokenize the text
tokens = tokenize_text(english_text)
if not tokens:
return None, translation_info + "No translatable tokens found."
# Create a temporary directory for the output
temp_dir = tempfile.gettempdir()
output_path = os.path.join(temp_dir, f"sign_output_{uuid.uuid4()}.mp4")
# Create a sign language video for the first token
# In a full implementation, you would create videos for all tokens and concatenate them
first_token = tokens[0] if tokens else "error"
video_path, video_status = create_simple_sign_video(first_token, output_path)
# Prepare status message
status = translation_info + video_status
if len(tokens) > 1:
status += f"\nNote: Only showing sign for first word. Full text: {english_text}"
return video_path, status
except Exception as e:
error_msg = str(e)
print(f"Error during translation: {error_msg}")
return None, f"Error during translation: {error_msg}"
# Create the Gradio interface
with gr.Blocks(title=TITLE) as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
# Input area
text_input = gr.Textbox(
lines=4,
placeholder="Enter English or Arabic text here...",
label="Text Input"
)
with gr.Row():
clear_btn = gr.Button("Clear")
translate_btn = gr.Button("Translate to Sign Language", variant="primary")
# Status area
status_output = gr.Textbox(label="Status", interactive=False)
with gr.Column():
# Output (video or text, depending on what's available)
output_display = gr.Video(
label="Sign Language Output",
format="mp4",
autoplay=True
) if cv2 is not None else gr.Textbox(label="Sign Representation", lines=3)
# Examples in both languages
gr.Examples(
examples=[
["Hello, how can I help you?"],
["Thank you for your patience."],
["Yes, please wait."],
["مرحبا"],
["شكرا"],
["نعم، من فضلك انتظر"],
],
inputs=[text_input],
outputs=[output_display, status_output],
fn=translate_to_sign
)
# Event handlers
translate_btn.click(
fn=translate_to_sign,
inputs=[text_input],
outputs=[output_display, status_output]
)
clear_btn.click(
fn=lambda: ("", "Input cleared"),
inputs=None,
outputs=[text_input, status_output]
)
# Launch the app
if __name__ == "__main__":
demo.launch()