Spaces:
Sleeping
Sleeping
File size: 2,836 Bytes
a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf 93f1bb6 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf a6c1838 71002bf 5d47b99 71002bf a6c1838 71002bf 5d47b99 71002bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import streamlit as st
from openai import OpenAI
from PIL import Image
import io
import os
import uuid
from gtts import gTTS
import cv2
import numpy as np
# --- Configuration ---
API_KEY = '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'
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=API_KEY
)
# --- Helper Functions ---
def describe_image(image_url):
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
{"type": "image_url", "image_url": {"url": image_url}}
]
}
]
)
return response.choices[0].message.content
def speak(text, filename=None):
if not filename:
filename = f"audio_{uuid.uuid4()}.mp3"
tts = gTTS(text=text, lang='en')
tts.save(filename)
return filename
def image_to_array(uploaded_image):
img = Image.open(uploaded_image)
img = img.convert('RGB') # Ensure 3 channels
return np.array(img)
def array_to_base64(img_array):
_, buffer = cv2.imencode('.jpg', img_array)
return "data:image/jpeg;base64," + buffer.tobytes().hex()
# --- Streamlit UI ---
st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
st.title("ποΈ AI Visual Assistant for the Blind")
st.markdown("Use your **camera** to capture the world around you.")
st.subheader("πΈ Take a Picture")
camera_image = st.camera_input("Capture a frame from your camera")
if camera_image is not None:
st.image(camera_image, caption="Captured Frame", use_column_width=True)
with st.spinner("Analyzing the scene..."):
# Save temporarily
temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
pil_img = Image.open(camera_image).convert("RGB")
pil_img.save(temp_path)
# Simulate URL (in production, you'd upload to cloud storage)
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
description = describe_image(image_url)
st.subheader("π Description")
st.write(description)
st.subheader("π Audio Narration")
audio_file = speak(description)
audio_bytes = open(audio_file, 'rb').read()
st.audio(audio_bytes, format='audio/mp3')
# Cleanup
os.remove(temp_path)
os.remove(audio_file)
st.markdown("---")
st.markdown("*Built with π‘ using Streamlit, OpenRouter, and gTTS.*") |