File size: 2,836 Bytes
a6c1838
 
 
 
71002bf
 
 
a6c1838
 
 
71002bf
 
a6c1838
71002bf
 
 
 
a6c1838
71002bf
a6c1838
71002bf
 
93f1bb6
71002bf
 
 
 
 
 
 
 
 
 
a6c1838
71002bf
a6c1838
71002bf
 
 
 
 
 
a6c1838
71002bf
 
 
 
a6c1838
71002bf
 
 
a6c1838
71002bf
5d47b99
71002bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a6c1838
71002bf
 
 
5d47b99
71002bf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
from openai import OpenAI
from PIL import Image
import io
import os
import uuid
from gtts import gTTS
import cv2
import numpy as np

# --- Configuration ---
API_KEY = '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'

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=API_KEY
)

# --- Helper Functions ---

def describe_image(image_url):
    response = client.chat.completions.create(
       
        model="opengvlab/internvl3-14b:free",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe this image clearly, including objects, scene, and any visible text. Also warn about potential hazards like wet floors, stairs, obstacles."},
                    {"type": "image_url", "image_url": {"url": image_url}}
                ]
            }
        ]
    )
    return response.choices[0].message.content

def speak(text, filename=None):
    if not filename:
        filename = f"audio_{uuid.uuid4()}.mp3"
    tts = gTTS(text=text, lang='en')
    tts.save(filename)
    return filename

def image_to_array(uploaded_image):
    img = Image.open(uploaded_image)
    img = img.convert('RGB')  # Ensure 3 channels
    return np.array(img)

def array_to_base64(img_array):
    _, buffer = cv2.imencode('.jpg', img_array)
    return "data:image/jpeg;base64," + buffer.tobytes().hex()

# --- Streamlit UI ---

st.set_page_config(page_title="AI Visual Assistant for the Blind", layout="centered")
st.title("πŸ‘οΈ AI Visual Assistant for the Blind")
st.markdown("Use your **camera** to capture the world around you.")

st.subheader("πŸ“Έ Take a Picture")
camera_image = st.camera_input("Capture a frame from your camera")

if camera_image is not None:
    st.image(camera_image, caption="Captured Frame", use_column_width=True)

    with st.spinner("Analyzing the scene..."):
        # Save temporarily
        temp_path = f"temp_frame_{uuid.uuid4()}.jpg"
        pil_img = Image.open(camera_image).convert("RGB")
        pil_img.save(temp_path)

        # Simulate URL (in production, you'd upload to cloud storage)
        image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"

        description = describe_image(image_url)

        st.subheader("πŸ“ Description")
        st.write(description)

        st.subheader("πŸ”Š Audio Narration")
        audio_file = speak(description)
        audio_bytes = open(audio_file, 'rb').read()
        st.audio(audio_bytes, format='audio/mp3')

        # Cleanup
        os.remove(temp_path)
        os.remove(audio_file)

st.markdown("---")
st.markdown("*Built with πŸ’‘ using Streamlit, OpenRouter, and gTTS.*")