Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ from PIL import Image
|
|
4 |
from gtts import gTTS
|
5 |
import io
|
6 |
|
7 |
-
#
|
8 |
genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
|
9 |
|
10 |
def get_gemini_context():
|
@@ -39,7 +39,6 @@ def interpret_sign(image):
|
|
39 |
if image is None:
|
40 |
return "No image captured.", None
|
41 |
|
42 |
-
# Gemini model
|
43 |
model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
|
44 |
prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
|
45 |
response = model.generate_content([prompt, image])
|
@@ -48,7 +47,6 @@ def interpret_sign(image):
|
|
48 |
if not text_response:
|
49 |
return "Could not interpret the sign.", None
|
50 |
|
51 |
-
# Generate TTS audio
|
52 |
tts = gTTS(text=text_response, lang='en', slow=False)
|
53 |
audio_fp = io.BytesIO()
|
54 |
tts.write_to_fp(audio_fp)
|
@@ -62,7 +60,7 @@ def interpret_sign(image):
|
|
62 |
# Gradio Interface
|
63 |
demo = gr.Interface(
|
64 |
fn=interpret_sign,
|
65 |
-
inputs=gr.
|
66 |
outputs=[
|
67 |
gr.Textbox(label="Interpretation"),
|
68 |
gr.Audio(label="Audio Interpretation")
|
|
|
4 |
from gtts import gTTS
|
5 |
import io
|
6 |
|
7 |
+
# Gemini API key
|
8 |
genai.configure(api_key="AIzaSyB6JYzYNfi8ak7g6526raHQ08YPMiC5Wic")
|
9 |
|
10 |
def get_gemini_context():
|
|
|
39 |
if image is None:
|
40 |
return "No image captured.", None
|
41 |
|
|
|
42 |
model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')
|
43 |
prompt = f"{get_gemini_context()}\n\nWhat sign or gesture is being shown in this image? Provide a clear, concise interpretation."
|
44 |
response = model.generate_content([prompt, image])
|
|
|
47 |
if not text_response:
|
48 |
return "Could not interpret the sign.", None
|
49 |
|
|
|
50 |
tts = gTTS(text=text_response, lang='en', slow=False)
|
51 |
audio_fp = io.BytesIO()
|
52 |
tts.write_to_fp(audio_fp)
|
|
|
60 |
# Gradio Interface
|
61 |
demo = gr.Interface(
|
62 |
fn=interpret_sign,
|
63 |
+
inputs=gr.Image(source="webcam", type="pil"),
|
64 |
outputs=[
|
65 |
gr.Textbox(label="Interpretation"),
|
66 |
gr.Audio(label="Audio Interpretation")
|