Spaces:
Sleeping
Sleeping
import streamlit as st | |
from openai import OpenAI | |
from PIL import Image | |
import io | |
import os | |
import uuid | |
from gtts import gTTS | |
import cv2 | |
import numpy as np | |
# --- Configuration --- | |
API_KEY = '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' | |
client = OpenAI( | |
base_url="https://openrouter.ai/api/v1", | |
api_key=API_KEY | |
) | |
# --- Helper Functions --- | |
def describe_image(image_url): | |
response = client.chat.completions.create( | |
model="opengvlab/internvl3-14b:free", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."}, | |
{"type": "image_url", "image_url": {"url": image_url}} | |
] | |
} | |
] | |
) | |
return response.choices[0].message.content | |
def speak(text, filename=None): | |
if not filename: | |
filename = f"audio_{uuid.uuid4()}.mp3" | |
tts = gTTS(text=text, lang='en') | |
tts.save(filename) | |
return filename | |
def image_to_array(uploaded_image): | |
img = Image.open(uploaded_image) | |
img = img.convert('RGB') # Ensure 3 channels | |
return np.array(img) | |
def array_to_base64(img_array): | |
_, buffer = cv2.imencode('.jpg', img_array) | |
return "data:image/jpeg;base64," + buffer.tobytes().hex() | |
# --- Streamlit UI --- | |
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered") | |
st.title("ποΈ AI Visual Assistant for the Blind") | |
st.markdown("Use your **camera** to capture the world around you.") | |
st.subheader("πΈ Take a Picture") | |
camera_image = st.camera_input("Capture a frame from your camera") | |
if camera_image is not None: | |
st.image(camera_image, caption="Captured Frame", use_column_width=True) | |
with st.spinner("Analyzing the scene..."): | |
# Save temporarily | |
temp_path = f"temp_frame_{uuid.uuid4()}.jpg" | |
pil_img = Image.open(camera_image).convert("RGB") | |
pil_img.save(temp_path) | |
# Simulate URL (in production, you'd upload to cloud storage) | |
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
description = describe_image(image_url) | |
st.subheader("π Description") | |
st.write(description) | |
st.subheader("π Audio Narration") | |
audio_file = speak(description) | |
audio_bytes = open(audio_file, 'rb').read() | |
st.audio(audio_bytes, format='audio/mp3') | |
# Cleanup | |
os.remove(temp_path) | |
os.remove(audio_file) | |
st.markdown("---") | |
st.markdown("*Built with π‘ using Streamlit, OpenRouter, and gTTS.*") |