Spaces:

iSushant
/

API

Sleeping

iSushant commited on May 27

Commit

d619bf0

verified ·

1 Parent(s): 07cedda

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from PIL import Image
 from gtts import gTTS
 import io
-# Configure Gemini API key
 genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
 def get_gemini_context():
@@ -39,7 +39,6 @@ def interpret_sign(image):
         if image is None:
             return "No image captured.", None
-        # Gemini model
         model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
         prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
         response = model.generate_content([prompt, image])
@@ -48,7 +47,6 @@ def interpret_sign(image):
         if not text_response:
             return "Could not interpret the sign.", None
-        # Generate TTS audio
         tts = gTTS(text=text_response, lang='en', slow=False)
         audio_fp = io.BytesIO()
         tts.write_to_fp(audio_fp)
@@ -62,7 +60,7 @@ def interpret_sign(image):
 # Gradio Interface
 demo = gr.Interface(
     fn=interpret_sign,
-    inputs=gr.Camera(label="Capture Sign Image", type="pil"),
     outputs=[
         gr.Textbox(label="Interpretation"),
         gr.Audio(label="Audio Interpretation")

 from gtts import gTTS
 import io
+# Gemini API key
 genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
 def get_gemini_context():
         if image is None:
             return "No image captured.", None
         model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
         prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
         response = model.generate_content([prompt, image])
         if not text_response:
             return "Could not interpret the sign.", None
         tts = gTTS(text=text_response, lang='en', slow=False)
         audio_fp = io.BytesIO()
         tts.write_to_fp(audio_fp)
 # Gradio Interface
 demo = gr.Interface(
     fn=interpret_sign,
+    inputs=gr.Image(source="webcam", type="pil"),
     outputs=[
         gr.Textbox(label="Interpretation"),
         gr.Audio(label="Audio Interpretation")