Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,20 +6,19 @@ import json
|
|
6 |
from datetime import datetime
|
7 |
import tempfile
|
8 |
import uuid
|
|
|
9 |
|
10 |
# Install required packages if not already installed
|
11 |
try:
|
12 |
import mediapipe as mp
|
13 |
import cv2
|
14 |
import numpy as np
|
15 |
-
from googletrans import Translator
|
16 |
except ImportError:
|
17 |
print("Installing required packages...")
|
18 |
-
os.system("pip install mediapipe opencv-python numpy
|
19 |
import mediapipe as mp
|
20 |
import cv2
|
21 |
import numpy as np
|
22 |
-
from googletrans import Translator
|
23 |
|
24 |
TITLE = "Multilingual Sign Language Customer Assistant"
|
25 |
DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
|
@@ -27,18 +26,36 @@ The system automatically detects the input language and generates appropriate si
|
|
27 |
|
28 |
**Features:**
|
29 |
- Supports both English and Arabic text
|
30 |
-
- Uses
|
31 |
-
- Uses ArSL for Arabic sign language
|
32 |
- Perfect for customer service and assistance scenarios
|
33 |
"""
|
34 |
|
35 |
-
# Initialize
|
36 |
-
translator = Translator()
|
37 |
mp_hands = mp.solutions.hands
|
38 |
mp_drawing = mp.solutions.drawing_utils
|
39 |
mp_drawing_styles = mp.solutions.drawing_styles
|
40 |
mp_pose = mp.solutions.pose
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# SignDict - dictionary of common signs in both languages
|
43 |
# In a production app, these would link to pre-recorded videos or 3D animations
|
44 |
SIGN_DICT = {
|
@@ -109,6 +126,28 @@ def detect_language(text):
|
|
109 |
else:
|
110 |
return "unknown"
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
def tokenize_text(text, language):
|
113 |
"""Split text into tokens that can be matched to signs"""
|
114 |
if language == "ar":
|
@@ -150,18 +189,6 @@ def tokenize_text(text, language):
|
|
150 |
i += 1
|
151 |
return phrases
|
152 |
|
153 |
-
def translate_if_needed(text, source_lang, target_lang):
|
154 |
-
"""Translate text if it's not already in the target language"""
|
155 |
-
if source_lang == target_lang:
|
156 |
-
return text
|
157 |
-
|
158 |
-
try:
|
159 |
-
translation = translator.translate(text, src=source_lang, dest=target_lang)
|
160 |
-
return translation.text
|
161 |
-
except Exception as e:
|
162 |
-
print(f"Translation error: {str(e)}")
|
163 |
-
return text
|
164 |
-
|
165 |
def generate_default_sign_video(text, output_path, language="en"):
|
166 |
"""Generate a simple video with the text when no sign is available"""
|
167 |
# Create a black frame with text
|
@@ -193,11 +220,8 @@ def generate_default_sign_video(text, output_path, language="en"):
|
|
193 |
video.release()
|
194 |
return output_path
|
195 |
|
196 |
-
def create_avatar_animation(text, output_path, language="en"):
|
197 |
"""Create a 3D avatar animation for the sign (simplified version)"""
|
198 |
-
# In a real implementation, this would use a 3D avatar system
|
199 |
-
# Here we'll just simulate it with a basic animation
|
200 |
-
|
201 |
width, height = 640, 480
|
202 |
fps = 30
|
203 |
duration = 3 # seconds
|
@@ -209,32 +233,72 @@ def create_avatar_animation(text, output_path, language="en"):
|
|
209 |
# Create a simple animation with hands
|
210 |
frames = fps * duration
|
211 |
for i in range(frames):
|
212 |
-
# Create a
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
|
227 |
|
228 |
-
# Draw
|
229 |
-
|
230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
# Add text with current sign
|
233 |
font = cv2.FONT_HERSHEY_SIMPLEX
|
234 |
-
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (
|
235 |
if language == "ar":
|
236 |
# Right-to-left indicator
|
237 |
-
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (
|
238 |
|
239 |
video.write(frame)
|
240 |
|
@@ -249,13 +313,16 @@ def generate_sign_video(tokens, language, output_format="3D"):
|
|
249 |
|
250 |
# In a real implementation, this would concatenate actual sign videos
|
251 |
# For this demo, we'll create a simple animation
|
252 |
-
if language in SIGN_DICT and tokens[0] in SIGN_DICT[language]:
|
253 |
# In a real implementation, this would load the video file
|
254 |
# For demo purposes, we'll create an animation
|
255 |
-
create_avatar_animation(tokens[0], output_path, language)
|
256 |
else:
|
257 |
# Generate a default video with text
|
258 |
-
|
|
|
|
|
|
|
259 |
|
260 |
return output_path
|
261 |
|
@@ -330,16 +397,16 @@ with gr.Blocks(title=TITLE) as demo:
|
|
330 |
# Examples in both languages
|
331 |
gr.Examples(
|
332 |
examples=[
|
333 |
-
["Hello, how can I help you today?"],
|
334 |
-
["Please wait while I check your account."],
|
335 |
-
["Thank you for your patience."],
|
336 |
-
["
|
337 |
-
["من فضلك انتظر بينما أتحقق من حسابك."],
|
338 |
-
["شكرا لصبرك."]
|
339 |
],
|
340 |
-
inputs=[text_input],
|
341 |
outputs=[video_output, status_output],
|
342 |
-
fn=
|
343 |
)
|
344 |
|
345 |
# Event handlers
|
@@ -357,4 +424,4 @@ with gr.Blocks(title=TITLE) as demo:
|
|
357 |
|
358 |
# Launch the app
|
359 |
if __name__ == "__main__":
|
360 |
-
demo.launch()
|
|
|
6 |
from datetime import datetime
|
7 |
import tempfile
|
8 |
import uuid
|
9 |
+
import re
|
10 |
|
11 |
# Install required packages if not already installed
|
12 |
try:
|
13 |
import mediapipe as mp
|
14 |
import cv2
|
15 |
import numpy as np
|
|
|
16 |
except ImportError:
|
17 |
print("Installing required packages...")
|
18 |
+
os.system("pip install mediapipe opencv-python numpy --quiet")
|
19 |
import mediapipe as mp
|
20 |
import cv2
|
21 |
import numpy as np
|
|
|
22 |
|
23 |
TITLE = "Multilingual Sign Language Customer Assistant"
|
24 |
DESCRIPTION = """This app translates English or Arabic text into sign language videos for customer assistance.
|
|
|
26 |
|
27 |
**Features:**
|
28 |
- Supports both English and Arabic text
|
29 |
+
- Uses 3D avatar technology to generate sign language
|
|
|
30 |
- Perfect for customer service and assistance scenarios
|
31 |
"""
|
32 |
|
33 |
+
# Initialize MediaPipe
|
|
|
34 |
mp_hands = mp.solutions.hands
|
35 |
mp_drawing = mp.solutions.drawing_utils
|
36 |
mp_drawing_styles = mp.solutions.drawing_styles
|
37 |
mp_pose = mp.solutions.pose
|
38 |
|
39 |
+
# Dictionary of translations for common customer service phrases
|
40 |
+
TRANSLATIONS = {
|
41 |
+
"hello": "مرحبا",
|
42 |
+
"welcome": "أهلا وسهلا",
|
43 |
+
"thank you": "شكرا",
|
44 |
+
"help": "مساعدة",
|
45 |
+
"yes": "نعم",
|
46 |
+
"no": "لا",
|
47 |
+
"please": "من فضلك",
|
48 |
+
"wait": "انتظر",
|
49 |
+
"sorry": "آسف",
|
50 |
+
"how can i help you": "كيف يمكنني مساعدتك",
|
51 |
+
"customer": "عميل",
|
52 |
+
"service": "خدمة",
|
53 |
+
"support": "دعم",
|
54 |
+
"information": "معلومات",
|
55 |
+
"question": "سؤال",
|
56 |
+
"answer": "إجابة",
|
57 |
+
}
|
58 |
+
|
59 |
# SignDict - dictionary of common signs in both languages
|
60 |
# In a production app, these would link to pre-recorded videos or 3D animations
|
61 |
SIGN_DICT = {
|
|
|
126 |
else:
|
127 |
return "unknown"
|
128 |
|
129 |
+
def translate_text(text, source_lang, target_lang):
|
130 |
+
"""Simple dictionary-based translation"""
|
131 |
+
if source_lang == target_lang:
|
132 |
+
return text
|
133 |
+
|
134 |
+
# Convert to lowercase for matching
|
135 |
+
text_lower = text.lower()
|
136 |
+
|
137 |
+
# For English to Arabic
|
138 |
+
if source_lang == "en" and target_lang == "ar":
|
139 |
+
for eng, ar in TRANSLATIONS.items():
|
140 |
+
text_lower = text_lower.replace(eng, ar)
|
141 |
+
return text_lower
|
142 |
+
|
143 |
+
# For Arabic to English
|
144 |
+
if source_lang == "ar" and target_lang == "en":
|
145 |
+
for eng, ar in TRANSLATIONS.items():
|
146 |
+
text_lower = text_lower.replace(ar, eng)
|
147 |
+
return text_lower
|
148 |
+
|
149 |
+
return text # Return original if no translation path
|
150 |
+
|
151 |
def tokenize_text(text, language):
|
152 |
"""Split text into tokens that can be matched to signs"""
|
153 |
if language == "ar":
|
|
|
189 |
i += 1
|
190 |
return phrases
|
191 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
def generate_default_sign_video(text, output_path, language="en"):
|
193 |
"""Generate a simple video with the text when no sign is available"""
|
194 |
# Create a black frame with text
|
|
|
220 |
video.release()
|
221 |
return output_path
|
222 |
|
223 |
+
def create_avatar_animation(text, output_path, language="en", style="3D"):
|
224 |
"""Create a 3D avatar animation for the sign (simplified version)"""
|
|
|
|
|
|
|
225 |
width, height = 640, 480
|
226 |
fps = 30
|
227 |
duration = 3 # seconds
|
|
|
233 |
# Create a simple animation with hands
|
234 |
frames = fps * duration
|
235 |
for i in range(frames):
|
236 |
+
# Create a background based on style
|
237 |
+
if style == "3D":
|
238 |
+
# Create a gradient background
|
239 |
+
frame = np.zeros((height, width, 3), dtype=np.uint8)
|
240 |
+
for y in range(height):
|
241 |
+
for x in range(width):
|
242 |
+
frame[y, x] = [
|
243 |
+
int(100 + 50 * (x / width)),
|
244 |
+
int(60 + 30 * (y / height)),
|
245 |
+
int(120 + 40 * ((x+y) / (width+height)))
|
246 |
+
]
|
247 |
+
else:
|
248 |
+
# Simple solid background for 2D
|
249 |
+
frame = np.ones((height, width, 3), dtype=np.uint8) * np.array([240, 240, 240], dtype=np.uint8)
|
|
|
250 |
|
251 |
+
# Draw a simple avatar
|
252 |
+
if style == "3D":
|
253 |
+
# 3D-style avatar
|
254 |
+
# Body
|
255 |
+
cv2.rectangle(frame, (width//2-50, height//2-100), (width//2+50, height//2+100), (200, 200, 200), -1)
|
256 |
+
# Head
|
257 |
+
cv2.circle(frame, (width//2, height//2-150), 50, (200, 200, 200), -1)
|
258 |
+
|
259 |
+
# Animate hands based on frame number
|
260 |
+
t = i / frames
|
261 |
+
# Left hand movement
|
262 |
+
x1 = int(width//2 - 100 - 50 * np.sin(t * 2 * np.pi))
|
263 |
+
y1 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
|
264 |
+
# Right hand movement
|
265 |
+
x2 = int(width//2 + 100 + 50 * np.sin(t * 2 * np.pi))
|
266 |
+
y2 = int(height//2 - 50 * np.cos(t * 2 * np.pi))
|
267 |
+
|
268 |
+
# Draw hands
|
269 |
+
cv2.circle(frame, (x1, y1), 20, (200, 200, 200), -1)
|
270 |
+
cv2.circle(frame, (x2, y2), 20, (200, 200, 200), -1)
|
271 |
+
else:
|
272 |
+
# 2D-style signing
|
273 |
+
# Drawing a simplified 2D signer
|
274 |
+
cv2.line(frame, (width//2, height//2-100), (width//2, height//2+50), (0, 0, 0), 3) # Body
|
275 |
+
cv2.circle(frame, (width//2, height//2-120), 20, (0, 0, 0), 2) # Head
|
276 |
+
|
277 |
+
# Animated hands for signing
|
278 |
+
t = i / frames
|
279 |
+
angle1 = t * 2 * np.pi
|
280 |
+
angle2 = t * 2 * np.pi + np.pi/2
|
281 |
+
|
282 |
+
# Left arm
|
283 |
+
x1 = int(width//2)
|
284 |
+
y1 = int(height//2 - 70)
|
285 |
+
x2 = int(x1 - 60 * np.cos(angle1))
|
286 |
+
y2 = int(y1 + 60 * np.sin(angle1))
|
287 |
+
cv2.line(frame, (x1, y1), (x2, y2), (0, 0, 0), 2)
|
288 |
+
|
289 |
+
# Right arm
|
290 |
+
x3 = int(width//2)
|
291 |
+
y3 = int(height//2 - 70)
|
292 |
+
x4 = int(x3 + 60 * np.cos(angle2))
|
293 |
+
y4 = int(y3 + 60 * np.sin(angle2))
|
294 |
+
cv2.line(frame, (x3, y3), (x4, y4), (0, 0, 0), 2)
|
295 |
|
296 |
# Add text with current sign
|
297 |
font = cv2.FONT_HERSHEY_SIMPLEX
|
298 |
+
cv2.putText(frame, text, (width//2-100, height-50), font, 1, (0, 0, 0), 2)
|
299 |
if language == "ar":
|
300 |
# Right-to-left indicator
|
301 |
+
cv2.putText(frame, "RTL", (width-70, 30), font, 0.7, (0, 0, 0), 1)
|
302 |
|
303 |
video.write(frame)
|
304 |
|
|
|
313 |
|
314 |
# In a real implementation, this would concatenate actual sign videos
|
315 |
# For this demo, we'll create a simple animation
|
316 |
+
if language in SIGN_DICT and tokens and tokens[0] in SIGN_DICT[language]:
|
317 |
# In a real implementation, this would load the video file
|
318 |
# For demo purposes, we'll create an animation
|
319 |
+
create_avatar_animation(tokens[0], output_path, language, output_format)
|
320 |
else:
|
321 |
# Generate a default video with text
|
322 |
+
if tokens:
|
323 |
+
create_avatar_animation(tokens[0], output_path, language, output_format)
|
324 |
+
else:
|
325 |
+
create_avatar_animation("No tokens", output_path, language, output_format)
|
326 |
|
327 |
return output_path
|
328 |
|
|
|
397 |
# Examples in both languages
|
398 |
gr.Examples(
|
399 |
examples=[
|
400 |
+
["Hello, how can I help you today?", "3D"],
|
401 |
+
["Please wait while I check your account.", "3D"],
|
402 |
+
["Thank you for your patience.", "3D"],
|
403 |
+
["مرحبا�� كيف يمكنني مساعدتك اليوم؟", "3D"],
|
404 |
+
["من فضلك انتظر بينما أتحقق من حسابك.", "3D"],
|
405 |
+
["شكرا لصبرك.", "3D"]
|
406 |
],
|
407 |
+
inputs=[text_input, format_dropdown],
|
408 |
outputs=[video_output, status_output],
|
409 |
+
fn=translate_to_sign
|
410 |
)
|
411 |
|
412 |
# Event handlers
|
|
|
424 |
|
425 |
# Launch the app
|
426 |
if __name__ == "__main__":
|
427 |
+
demo.launch()
|