iSushant commited on
Commit
9963dcc
·
verified ·
1 Parent(s): d619bf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -28
app.py CHANGED
@@ -1,10 +1,11 @@
1
  import gradio as gr
2
  import google.generativeai as genai
3
  from PIL import Image
4
- from gtts import gTTS
5
  import io
 
 
6
 
7
- # Gemini API key
8
  genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
9
 
10
  def get_gemini_context():
@@ -16,58 +17,56 @@ def get_gemini_context():
16
  4. Provide clear, direct interpretation
17
  5. If unsure, describe the hand position/gesture seen
18
  Keep responses brief and focused on the sign's meaning.
19
- """
20
 
21
  def get_gemini_response_text(response):
22
- if response.prompt_feedback and response.prompt_feedback.block_reason:
23
  return None
24
-
25
- if response.candidates:
26
  for candidate in response.candidates:
27
- if candidate.content and candidate.content.parts:
28
  full_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
29
  if full_text.strip():
30
  return full_text
31
-
32
  if hasattr(response, 'text') and response.text and response.text.strip():
33
  return response.text
34
-
35
  return None
36
 
37
- def interpret_sign(image):
38
  try:
39
- if image is None:
40
- return "No image captured.", None
41
-
42
  model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
43
  prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
44
  response = model.generate_content([prompt, image])
45
-
46
  text_response = get_gemini_response_text(response)
47
  if not text_response:
48
  return "Could not interpret the sign.", None
49
 
 
50
  tts = gTTS(text=text_response, lang='en', slow=False)
51
  audio_fp = io.BytesIO()
52
  tts.write_to_fp(audio_fp)
53
  audio_fp.seek(0)
54
-
55
- return text_response, (audio_fp, "speech.mp3")
56
-
57
  except Exception as e:
58
  return f"Error: {str(e)}", None
59
 
60
- # Gradio Interface
61
- demo = gr.Interface(
62
- fn=interpret_sign,
63
- inputs=gr.Image(source="webcam", type="pil"),
64
- outputs=[
65
- gr.Textbox(label="Interpretation"),
66
- gr.Audio(label="Audio Interpretation")
67
- ],
68
- title="Sign Language Interpreter",
69
- description="Capture an image using your webcam. The app interprets the sign and reads it aloud."
70
- )
 
 
 
 
71
 
72
  if __name__ == "__main__":
73
  demo.launch()
 
1
  import gradio as gr
2
  import google.generativeai as genai
3
  from PIL import Image
 
4
  import io
5
+ from gtts import gTTS
6
+ import base64
7
 
8
+ # Configure Gemini API
9
  genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
10
 
11
  def get_gemini_context():
 
17
  4. Provide clear, direct interpretation
18
  5. If unsure, describe the hand position/gesture seen
19
  Keep responses brief and focused on the sign's meaning.
20
+ """
21
 
22
  def get_gemini_response_text(response):
23
+ if hasattr(response, 'prompt_feedback') and response.prompt_feedback and getattr(response.prompt_feedback, 'block_reason', None):
24
  return None
25
+ if hasattr(response, 'candidates'):
 
26
  for candidate in response.candidates:
27
+ if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts'):
28
  full_text = "".join(part.text for part in candidate.content.parts if hasattr(part, 'text'))
29
  if full_text.strip():
30
  return full_text
 
31
  if hasattr(response, 'text') and response.text and response.text.strip():
32
  return response.text
 
33
  return None
34
 
35
+ def interpret_sign(image: Image.Image):
36
  try:
37
+ # Get Gemini interpretation
 
 
38
  model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
39
  prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
40
  response = model.generate_content([prompt, image])
 
41
  text_response = get_gemini_response_text(response)
42
  if not text_response:
43
  return "Could not interpret the sign.", None
44
 
45
+ # Generate audio
46
  tts = gTTS(text=text_response, lang='en', slow=False)
47
  audio_fp = io.BytesIO()
48
  tts.write_to_fp(audio_fp)
49
  audio_fp.seek(0)
50
+ audio_bytes = audio_fp.read()
51
+ return text_response, (audio_bytes, "audio/mp3")
 
52
  except Exception as e:
53
  return f"Error: {str(e)}", None
54
 
55
+ with gr.Blocks(title="Sign Language Interpreter") as demo:
56
+ gr.Markdown("# Sign Language Interpreter\nUpload or capture an image of a sign language gesture to get an interpretation and audio.")
57
+ with gr.Row():
58
+ with gr.Column():
59
+ image_input = gr.Image(label="Sign Image", sources=["upload", "webcam"], type="pil")
60
+ submit_btn = gr.Button("Interpret Sign")
61
+ with gr.Column():
62
+ text_output = gr.Textbox(label="Interpretation")
63
+ audio_output = gr.Audio(label="Audio", type="filepath")
64
+
65
+ submit_btn.click(
66
+ interpret_sign,
67
+ inputs=image_input,
68
+ outputs=[text_output, audio_output]
69
+ )
70
 
71
  if __name__ == "__main__":
72
  demo.launch()