iSushant commited on
Commit
041500b
·
verified ·
1 Parent(s): 20b0319

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import google.generativeai as genai
3
+ from PIL import Image
4
+ from gtts import gTTS
5
+ import io
6
+ import base64
7
+
8
+ # Configure Gemini API key
9
+ genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
10
+
11
+ def get_gemini_context():
12
+ return """
13
+ Focus on identifying and interpreting sign language gestures:
14
+ 1. Look for hand shapes and finger positions
15
+ 2. Identify any American Sign Language (ASL) letters or numbers
16
+ 3. Recognize common ASL gestures and signs
17
+ 4. Provide clear, direct interpretation
18
+ 5. If unsure, describe the hand position/gesture seen
19
+ Keep responses brief and focused on the sign's meaning.
20
+ """
21
+
22
+ def get_gemini_response_text(response):
23
+ if response.prompt_feedback and response.prompt_feedback.block_reason:
24
+ return None
25
+
26
+ if response.candidates:
27
+ for candidate in response.candidates:
28
+ if candidate.content and candidate.content.parts:
29
+ full_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
30
+ if full_text.strip():
31
+ return full_text
32
+
33
+ if hasattr(response, 'text') and response.text and response.text.strip():
34
+ return response.text
35
+
36
+ return None
37
+
38
+ def interpret_sign(image):
39
+ try:
40
+ if image is None:
41
+ return "No image captured.", None
42
+
43
+ # Gemini model
44
+ model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
45
+ prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
46
+ response = model.generate_content([prompt, image])
47
+
48
+ text_response = get_gemini_response_text(response)
49
+ if not text_response:
50
+ return "Could not interpret the sign.", None
51
+
52
+ # Generate TTS audio
53
+ tts = gTTS(text=text_response, lang='en', slow=False)
54
+ audio_fp = io.BytesIO()
55
+ tts.write_to_fp(audio_fp)
56
+ audio_fp.seek(0)
57
+
58
+ return text_response, (audio_fp, "speech.mp3")
59
+
60
+ except Exception as e:
61
+ return f"Error: {str(e)}", None
62
+
63
+ # Gradio Interface
64
+ demo = gr.Interface(
65
+ fn=interpret_sign,
66
+ inputs=gr.Image(source="webcam", type="pil", label="Capture Sign Image"),
67
+ outputs=[
68
+ gr.Textbox(label="Interpretation"),
69
+ gr.Audio(label="Audio Interpretation")
70
+ ],
71
+ title="Sign Language Interpreter",
72
+ description="Capture an image using your webcam. The app interprets the sign and reads it aloud."
73
+ )
74
+
75
+ if __name__ == "__main__":
76
+ demo.launch()