Spaces:

iSushant
/

API

Sleeping

App Files Files Community

iSushant commited on May 27

Commit

041500b

verified ·

1 Parent(s): 20b0319

Create app.py

Browse files

Files changed (1) hide show

app.py +76 -0

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import gradio as gr
+import google.generativeai as genai
+from PIL import Image
+from gtts import gTTS
+import io
+import base64
+# Configure Gemini API key
+genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
+def get_gemini_context():
+    return """
+        Focus on identifying and interpreting sign language gestures:
+        1. Look for hand shapes and finger positions
+        2. Identify any American Sign Language (ASL) letters or numbers
+        3. Recognize common ASL gestures and signs
+        4. Provide clear, direct interpretation
+        5. If unsure, describe the hand position/gesture seen
+        Keep responses brief and focused on the sign's meaning.
+    """
+def get_gemini_response_text(response):
+    if response.prompt_feedback and response.prompt_feedback.block_reason:
+        return None
+    if response.candidates:
+        for candidate in response.candidates:
+            if candidate.content and candidate.content.parts:
+                full_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
+                if full_text.strip():
+                    return full_text
+    if hasattr(response, 'text') and response.text and response.text.strip():
+        return response.text
+    return None
+def interpret_sign(image):
+    try:
+        if image is None:
+            return "No image captured.", None
+        # Gemini model
+        model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
+        prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
+        response = model.generate_content([prompt, image])
+        text_response = get_gemini_response_text(response)
+        if not text_response:
+            return "Could not interpret the sign.", None
+        # Generate TTS audio
+        tts = gTTS(text=text_response, lang='en', slow=False)
+        audio_fp = io.BytesIO()
+        tts.write_to_fp(audio_fp)
+        audio_fp.seek(0)
+        return text_response, (audio_fp, "speech.mp3")
+    except Exception as e:
+        return f"Error: {str(e)}", None
+# Gradio Interface
+demo = gr.Interface(
+    fn=interpret_sign,
+    inputs=gr.Image(source="webcam", type="pil", label="Capture Sign Image"),
+    outputs=[
+        gr.Textbox(label="Interpretation"),
+        gr.Audio(label="Audio Interpretation")
+    ],
+    title="Sign Language Interpreter",
+    description="Capture an image using your webcam. The app interprets the sign and reads it aloud."
+)
+if __name__ == "__main__":
+    demo.launch()