Spaces:

iSushant
/

API

Sleeping

App Files Files Community

iSushant commited on May 27

Commit

9963dcc

verified ·

1 Parent(s): d619bf0

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -28

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import gradio as gr
 import google.generativeai as genai
 from PIL import Image
-from gtts import gTTS
 import io
-# Gemini API key
 genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
 def get_gemini_context():
@@ -16,58 +17,56 @@ def get_gemini_context():
         4. Provide clear, direct interpretation
         5. If unsure, describe the hand position/gesture seen
         Keep responses brief and focused on the sign's meaning.
-    """
 def get_gemini_response_text(response):
-    if response.prompt_feedback and response.prompt_feedback.block_reason:
         return None
-    if response.candidates:
         for candidate in response.candidates:
-            if candidate.content and candidate.content.parts:
                 full_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
                 if full_text.strip():
                     return full_text
     if hasattr(response, 'text') and response.text and response.text.strip():
         return response.text
     return None
-def interpret_sign(image):
     try:
-        if image is None:
-            return "No image captured.", None
         model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
         prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
         response = model.generate_content([prompt, image])
         text_response = get_gemini_response_text(response)
         if not text_response:
             return "Could not interpret the sign.", None
         tts = gTTS(text=text_response, lang='en', slow=False)
         audio_fp = io.BytesIO()
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
-        return text_response, (audio_fp, "speech.mp3")
     except Exception as e:
         return f"Error: {str(e)}", None
-# Gradio Interface
-demo = gr.Interface(
-    fn=interpret_sign,
-    inputs=gr.Image(source="webcam", type="pil"),
-    outputs=[
-        gr.Textbox(label="Interpretation"),
-        gr.Audio(label="Audio Interpretation")
-    ],
-    title="Sign Language Interpreter",
-    description="Capture an image using your webcam. The app interprets the sign and reads it aloud."
-)
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import google.generativeai as genai
 from PIL import Image
 import io
+from gtts import gTTS
+import base64
+# Configure Gemini API
 genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
 def get_gemini_context():
         4. Provide clear, direct interpretation
         5. If unsure, describe the hand position/gesture seen
         Keep responses brief and focused on the sign's meaning.
+        """
 def get_gemini_response_text(response):
+    if hasattr(response, 'prompt_feedback') and response.prompt_feedback and getattr(response.prompt_feedback, 'block_reason', None):
         return None
+    if hasattr(response, 'candidates'):
         for candidate in response.candidates:
+            if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts'):
                 full_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
                 if full_text.strip():
                     return full_text
     if hasattr(response, 'text') and response.text and response.text.strip():
         return response.text
     return None
+def interpret_sign(image: Image.Image):
     try:
+        # Get Gemini interpretation
         model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
         prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
         response = model.generate_content([prompt, image])
         text_response = get_gemini_response_text(response)
         if not text_response:
             return "Could not interpret the sign.", None
+        # Generate audio
         tts = gTTS(text=text_response, lang='en', slow=False)
         audio_fp = io.BytesIO()
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
+        audio_bytes = audio_fp.read()
+        return text_response, (audio_bytes, "audio/mp3")
     except Exception as e:
         return f"Error: {str(e)}", None
+with gr.Blocks(title="Sign Language Interpreter") as demo:
+    gr.Markdown("# Sign Language Interpreter\nUpload or capture an image of a sign language gesture to get an interpretation and audio.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(label="Sign Image", sources=["upload", "webcam"], type="pil")
+            submit_btn = gr.Button("Interpret Sign")
+        with gr.Column():
+            text_output = gr.Textbox(label="Interpretation")
+            audio_output = gr.Audio(label="Audio", type="filepath")
+    submit_btn.click(
+        interpret_sign,
+        inputs=image_input,
+        outputs=[text_output, audio_output]
+    )
 if __name__ == "__main__":
     demo.launch()