blind_vision / app.py
adil9858's picture
Update app.py
93f1bb6 verified
raw
history blame
2.84 kB
import streamlit as st
from openai import OpenAI
from PIL import Image
import io
import os
import uuid
from gtts import gTTS
import cv2
import numpy as np
# --- Configuration ---
API_KEY = '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'
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=API_KEY
)
# --- Helper Functions ---
def describe_image(image_url):
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
{"type": "image_url", "image_url": {"url": image_url}}
]
}
]
)
return response.choices[0].message.content
def speak(text, filename=None):
if not filename:
filename = f"audio_{uuid.uuid4()}.mp3"
tts = gTTS(text=text, lang='en')
tts.save(filename)
return filename
def image_to_array(uploaded_image):
img = Image.open(uploaded_image)
img = img.convert('RGB') # Ensure 3 channels
return np.array(img)
def array_to_base64(img_array):
_, buffer = cv2.imencode('.jpg', img_array)
return "data:image/jpeg;base64," + buffer.tobytes().hex()
# --- Streamlit UI ---
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
st.title("πŸ‘οΈ AI Visual Assistant for the Blind")
st.markdown("Use your **camera** to capture the world around you.")
st.subheader("πŸ“Έ Take a Picture")
camera_image = st.camera_input("Capture a frame from your camera")
if camera_image is not None:
st.image(camera_image, caption="Captured Frame", use_column_width=True)
with st.spinner("Analyzing the scene..."):
# Save temporarily
temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
pil_img = Image.open(camera_image).convert("RGB")
pil_img.save(temp_path)
# Simulate URL (in production, you'd upload to cloud storage)
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
description = describe_image(image_url)
st.subheader("πŸ“ Description")
st.write(description)
st.subheader("πŸ”Š Audio Narration")
audio_file = speak(description)
audio_bytes = open(audio_file, 'rb').read()
st.audio(audio_bytes, format='audio/mp3')
# Cleanup
os.remove(temp_path)
os.remove(audio_file)
st.markdown("---")
st.markdown("*Built with πŸ’‘ using Streamlit, OpenRouter, and gTTS.*")