iSushant commited on
Commit
d619bf0
·
verified ·
1 Parent(s): 07cedda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -4,7 +4,7 @@ from PIL import Image
4
  from gtts import gTTS
5
  import io
6
 
7
- # Configure Gemini API key
8
  genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
9
 
10
  def get_gemini_context():
@@ -39,7 +39,6 @@ def interpret_sign(image):
39
  if image is None:
40
  return "No image captured.", None
41
 
42
- # Gemini model
43
  model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
44
  prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
45
  response = model.generate_content([prompt, image])
@@ -48,7 +47,6 @@ def interpret_sign(image):
48
  if not text_response:
49
  return "Could not interpret the sign.", None
50
 
51
- # Generate TTS audio
52
  tts = gTTS(text=text_response, lang='en', slow=False)
53
  audio_fp = io.BytesIO()
54
  tts.write_to_fp(audio_fp)
@@ -62,7 +60,7 @@ def interpret_sign(image):
62
  # Gradio Interface
63
  demo = gr.Interface(
64
  fn=interpret_sign,
65
- inputs=gr.Camera(label="Capture Sign Image", type="pil"),
66
  outputs=[
67
  gr.Textbox(label="Interpretation"),
68
  gr.Audio(label="Audio Interpretation")
 
4
  from gtts import gTTS
5
  import io
6
 
7
+ # Gemini API key
8
  genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
9
 
10
  def get_gemini_context():
 
39
  if image is None:
40
  return "No image captured.", None
41
 
 
42
  model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
43
  prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
44
  response = model.generate_content([prompt, image])
 
47
  if not text_response:
48
  return "Could not interpret the sign.", None
49
 
 
50
  tts = gTTS(text=text_response, lang='en', slow=False)
51
  audio_fp = io.BytesIO()
52
  tts.write_to_fp(audio_fp)
 
60
  # Gradio Interface
61
  demo = gr.Interface(
62
  fn=interpret_sign,
63
+ inputs=gr.Image(source="webcam", type="pil"),
64
  outputs=[
65
  gr.Textbox(label="Interpretation"),
66
  gr.Audio(label="Audio Interpretation")