File size: 5,805 Bytes
3852507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
import streamlit as st
from ultralytics import YOLO
import cv2
import random
import time
from gtts import gTTS
import pygame
import threading
from datetime import datetime, timedelta

# Initialize pygame mixer
pygame.mixer.quit()  # Ensure the mixer is fully stopped
pygame.mixer.init()

# Load YOLOv8 model
yolo = YOLO("yolov8n.pt")

# Streamlit app layout
st.set_page_config(page_title="Assistive Vision App", layout="centered")
st.title("Assistive Vision App for Visual Impairments")
st.write("This application provides real-time object recognition and optional audio alerts.")

# Directory to store temp audio files
audio_temp_dir = "audio_temp_files"
if not os.path.exists(audio_temp_dir):
    os.makedirs(audio_temp_dir)

# Placeholder for video frames
stframe = st.empty()

# User controls
start_detection = st.button("Start Detection")
stop_detection = st.button("Stop Detection")
audio_activation = st.checkbox("Enable Audio Alerts", value=False)

# Categories for audio alerts (hazardous objects or living things)
alert_categories = {"person", "cat", "dog", "knife", "fire", "gun"}

# Dictionary to store the last alert timestamp for each object
last_alert_time = {}
alert_cooldown = timedelta(seconds=10)  # 10-second cooldown for alerts


def play_audio_alert(label, position):
    """Generate and play an audio alert."""
    phrases = [
        f"Be careful, there's a {label} on your {position}.",
        f"Watch out! {label} detected on your {position}.",
        f"Alert! A {label} is on your {position}.",
    ]
    caution_note = random.choice(phrases)

    # Generate a unique audio filename using timestamp
    temp_file_path = os.path.join(audio_temp_dir, f"temp_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}.mp3")

    # Generate audio
    tts = gTTS(caution_note)
    tts.save(temp_file_path)

    try:
        # Load and play the audio using pygame
        pygame.mixer.music.load(temp_file_path)
        pygame.mixer.music.play()

        # Create a new thread to handle file cleanup after playback
        def cleanup_audio_file():
            while pygame.mixer.music.get_busy():  # Wait until audio finishes playing
                time.sleep(0.1)

            # After music finishes, stop the music and try to remove the file
            pygame.mixer.music.stop()  # Explicitly stop the music before cleanup

            try:
                # Try to remove the file after playback is finished
                os.remove(temp_file_path)
            except OSError as e:
                print(f"Error deleting file {temp_file_path}: {e}")

        # Start the cleanup thread
        threading.Thread(target=cleanup_audio_file, daemon=True).start()

    except Exception as e:
        print(f"Error playing audio alert: {e}")


def process_frame(frame, audio_mode):
    """Process a single video frame for object detection."""
    results = yolo(frame)
    result = results[0]

    detected_objects = {}  # Dictionary to store object labels and their positions
    for box in result.boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        label = result.names[int(box.cls[0])]

        # Apply filtering logic for audio mode
        if audio_mode and label not in alert_categories:
            continue

        # Determine position
        frame_center_x = frame.shape[1] // 2
        obj_center_x = (x1 + x2) // 2
        position = "left" if obj_center_x < frame_center_x else "right"

        # Update detected objects with label and position
        detected_objects[label] = position

        # Draw bounding boxes
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            frame,
            f"{label}",
            (x1, y1 - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 255, 0),
            2,
        )

    return detected_objects, frame


# Main logic
if start_detection:
    st.success("Object detection started.")
    try:
        video_capture = cv2.VideoCapture(0)
        if not video_capture.isOpened():
            st.error("Could not access the webcam. Please check your camera settings.")
        else:
            while not stop_detection:
                ret, frame = video_capture.read()
                if not ret:
                    st.error("Failed to capture video. Please check your camera.")
                    break

                # Process the frame based on whether audio alerts are enabled
                detected_objects, processed_frame = process_frame(frame, audio_activation)

                # Convert BGR to RGB for Streamlit
                frame_rgb = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
                stframe.image(frame_rgb, channels="RGB", use_container_width=True)

                # Generate audio alerts for detected objects
                if audio_activation:
                    current_time = datetime.now()
                    for label, position in detected_objects.items():
                        # Check if enough time has passed since the last alert for this object
                        if (
                            label not in last_alert_time
                            or current_time - last_alert_time[label] > alert_cooldown
                        ):
                            play_audio_alert(label, position)
                            last_alert_time[label] = current_time  # Update the last alert time

                time.sleep(0.1)  # Small delay for smoother updates

    except Exception as e:
        st.error(f"An error occurred: {e}")
    finally:
        if 'video_capture' in locals() and video_capture.isOpened():
            video_capture.release()
            cv2.destroyAllWindows()
            pygame.mixer.quit()

elif stop_detection:
    st.warning("Object detection stopped.")