Spaces:

Guru-25
/

driver

Sleeping

App Files Files Community

Guru-25 commited on Apr 21

Commit

b7c0c37

verified ·

1 Parent(s): 5f7e302

new

Browse files

Files changed (2) hide show

app.py +125 -98
requirements.txt +1 -3

app.py CHANGED Viewed

@@ -4,12 +4,14 @@ import numpy as np
 import tempfile
 import os
 import time
 from scripts.inference import GazePredictor
 from utils.ear_utils import BlinkDetector
 from gradio_webrtc import WebRTC
 from ultralytics import YOLO
 import torch
-import spaces  # Add spaces import
 def smooth_values(history, current_value, window_size=5):
     if current_value is not None:
@@ -34,12 +36,45 @@ def smooth_values(history, current_value, window_size=5):
     else:
         return history[-1] if history else None
 # --- Model Paths ---
 GAZE_MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
 DISTRACTION_MODEL_PATH = "best.pt"
 # --- Global Initializations ---
-blink_detector = BlinkDetector()  # Keep BlinkDetector global as it is CPU-only
 # Distraction Class Names
 distraction_class_names = [
@@ -71,12 +106,10 @@ EYE_CLOSURE_THRESHOLD = 10
 HEAD_STABILITY_THRESHOLD = 0.05
 DISTRACTION_CONF_THRESHOLD = 0.1
-@spaces.GPU
 def analyze_video(input_video):
-    local_gaze_predictor = GazePredictor(GAZE_MODEL_PATH, device='cuda')  # Load directly to CUDA
-    local_blink_detector = blink_detector  # Use global CPU instance
     cap = cv2.VideoCapture(input_video)
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
     os.close(temp_fd)
@@ -200,11 +233,8 @@ def analyze_video(input_video):
         out.release()
     return temp_path
-@spaces.GPU
 def analyze_distraction_video(input_video):
-    local_distraction_model = YOLO(DISTRACTION_MODEL_PATH)
-    local_distraction_model.to('cuda')  # Move to GPU
     cap = cv2.VideoCapture(input_video)
     if not cap.isOpened():
         print("Error: Could not open video file.")
@@ -223,7 +253,7 @@ def analyze_distraction_video(input_video):
             break
         try:
-            results = local_distraction_model(frame, conf=DISTRACTION_CONF_THRESHOLD, verbose=False)
             display_text = "safe driving"
             alarm_action = None
@@ -272,6 +302,73 @@ def analyze_distraction_video(input_video):
         out.release()
     return temp_path
 def terminate_gaze_stream():
     global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
     global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam, stop_gaze_processing
@@ -292,15 +389,13 @@ def terminate_gaze_stream():
 def terminate_distraction_stream():
     global stop_distraction_processing
-    print("Distraction Live Termination signal received. Stopping processing.")
     stop_distraction_processing = True
-    return "Distraction Live Processing Terminated."
-@spaces.GPU
 def process_gaze_frame(frame):
-    gaze_predictor_live = GazePredictor(GAZE_MODEL_PATH, device='cuda')  # Load directly to CUDA
-    local_blink_detector = blink_detector  # Use global CPU instance
     global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
     global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam, stop_gaze_processing
@@ -316,8 +411,11 @@ def process_gaze_frame(frame):
         start_time = current_time
     try:
-        head_pose_gaze, gaze_h, gaze_v = gaze_predictor_live.predict_gaze(frame)
-        ear, left_eye, right_eye, head_pose, left_iris, right_iris = local_blink_detector.detect_blinks(frame)
         if ear is None:
             cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
@@ -409,72 +507,11 @@ def process_gaze_frame(frame):
         cv2.putText(error_frame, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
         return error_frame
-@spaces.GPU
-def process_distraction_frame(frame):
-    distraction_model_live = YOLO(DISTRACTION_MODEL_PATH)
-    distraction_model_live.to('cuda')
-    global stop_distraction_processing
-    if stop_distraction_processing:
-        return np.zeros((480, 640, 3), dtype=np.uint8)
-    if frame is None:
-        return np.zeros((480, 640, 3), dtype=np.uint8)
-    try:
-        frame_to_process = frame
-        results = distraction_model_live(frame_to_process, conf=DISTRACTION_CONF_THRESHOLD, verbose=False)
-        display_text = "safe driving"
-        alarm_action = None
-        for result in results:
-            if result.boxes is not None and len(result.boxes) > 0:
-                boxes = result.boxes.xyxy.cpu().numpy()
-                scores = result.boxes.conf.cpu().numpy()
-                classes = result.boxes.cls.cpu().numpy()
-                if len(boxes) > 0:
-                    max_score_idx = scores.argmax()
-                    detected_action_idx = int(classes[max_score_idx])
-                    if 0 <= detected_action_idx < len(distraction_class_names):
-                        detected_action = distraction_class_names[detected_action_idx]
-                        confidence = scores[max_score_idx]
-                        display_text = f"{detected_action}: {confidence:.2f}"
-                        if detected_action != 'safe driving':
-                            alarm_action = detected_action
-                    else:
-                        print(f"Warning: Detected class index {detected_action_idx} out of bounds.")
-                        display_text = "Unknown Detection"
-        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-        if alarm_action:
-            print(f"ALARM: Unsafe behavior detected - {alarm_action}!")
-            cv2.putText(frame_bgr, f"ALARM: {alarm_action}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
-        text_color = (0, 255, 0) if alarm_action is None else (0, 255, 255)
-        cv2.putText(frame_bgr, display_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)
-        frame_rgb_processed = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
-        return frame_rgb_processed
-    except Exception as e:
-        print(f"Error processing distraction frame: {e}")
-        error_frame = np.zeros((480, 640, 3), dtype=np.uint8)
-        if not error_frame.flags.writeable:
-            error_frame = error_frame.copy()
-        error_frame_bgr = cv2.cvtColor(error_frame, cv2.COLOR_RGB2BGR)
-        cv2.putText(error_frame_bgr, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
-        error_frame_rgb = cv2.cvtColor(error_frame_bgr, cv2.COLOR_BGR2RGB)
-        return error_frame_rgb
 def create_gaze_interface():
     with gr.Blocks() as gaze_demo:
         gr.Markdown("## Real-time Gaze & Drowsiness Tracking")
         with gr.Row():
-            webcam_stream = WebRTC(label="Webcam Stream")
         with gr.Row():
             terminate_btn = gr.Button("Terminate Process")
@@ -489,20 +526,10 @@ def create_gaze_interface():
     return gaze_demo
 def create_distraction_interface():
-    distraction_demo = gr.Interface(
-        fn=analyze_distraction_video,
-        inputs=gr.Video(sources=["upload", "webcam"], label="Input Video (Upload or Record)"),
-        outputs=gr.Video(label="Processed Video"),
-        title="Distraction Detection Analysis",
-        description="Upload or record a video to analyze driver distraction."
-    )
-    return distraction_demo
-def create_distraction_live_interface():
-    with gr.Blocks() as distraction_live_demo:
-        gr.Markdown("## Real-time Distraction Detection (Live)")
         with gr.Row():
-            webcam_stream = WebRTC(label="Webcam Stream")
         with gr.Row():
             terminate_btn = gr.Button("Terminate Process")
@@ -514,7 +541,7 @@ def create_distraction_live_interface():
         terminate_btn.click(fn=terminate_distraction_stream, inputs=None, outputs=None)
-    return distraction_live_demo
 def create_video_interface():
     video_demo = gr.Interface(
@@ -527,8 +554,8 @@ def create_video_interface():
     return video_demo
 demo = gr.TabbedInterface(
-    [create_video_interface(), create_gaze_interface(), create_distraction_interface(), create_distraction_live_interface()],
-    ["Gaze Video Upload", "Gaze & Drowsiness (Live)", "Distraction Video Upload", "Distraction Detection (Live)"],
     title="Driver Monitoring System"
 )
@@ -545,4 +572,4 @@ if __name__ == "__main__":
     frame_count_webcam = 0
     stop_gaze_processing = False
     stop_distraction_processing = False
-    demo.launch()

 import tempfile
 import os
 import time
+import spaces
 from scripts.inference import GazePredictor
 from utils.ear_utils import BlinkDetector
 from gradio_webrtc import WebRTC
 from ultralytics import YOLO
 import torch
+import json
+import requests
 def smooth_values(history, current_value, window_size=5):
     if current_value is not None:
     else:
         return history[-1] if history else None
+# --- Configure Twilio TURN servers for WebRTC ---
+def get_twilio_turn_credentials():
+    # Replace with your Twilio credentials or set as environment variables
+    twilio_account_sid = os.environ.get("TWILIO_ACCOUNT_SID", "")
+    twilio_auth_token = os.environ.get("TWILIO_AUTH_TOKEN", "")
+    if not twilio_account_sid or not twilio_auth_token:
+        print("Warning: Twilio credentials not found. Using default RTCConfiguration.")
+        return None
+    try:
+        response = requests.post(
+            f"https://api.twilio.com/2010-04-01/Accounts/{twilio_account_sid}/Tokens.json",
+            auth=(twilio_account_sid, twilio_auth_token),
+        )
+        data = response.json()
+        return data["ice_servers"]
+    except Exception as e:
+        print(f"Error fetching Twilio TURN credentials: {e}")
+        return None
+# Configure WebRTC
+ice_servers = get_twilio_turn_credentials()
+if ice_servers:
+    rtc_configuration = {"iceServers": ice_servers}
+else:
+    rtc_configuration = {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
 # --- Model Paths ---
 GAZE_MODEL_PATH = os.path.join("models", "gaze_estimation_model.pth")
 DISTRACTION_MODEL_PATH = "best.pt"
 # --- Global Initializations ---
+gaze_predictor = GazePredictor(GAZE_MODEL_PATH)
+blink_detector = BlinkDetector()
+# Load Distraction Model
+distraction_model = YOLO(DISTRACTION_MODEL_PATH)
+distraction_model.to('cpu')
 # Distraction Class Names
 distraction_class_names = [
 HEAD_STABILITY_THRESHOLD = 0.05
 DISTRACTION_CONF_THRESHOLD = 0.1
 def analyze_video(input_video):
     cap = cv2.VideoCapture(input_video)
+    local_gaze_predictor = GazePredictor(GAZE_MODEL_PATH)
+    local_blink_detector = BlinkDetector()
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     temp_fd, temp_path = tempfile.mkstemp(suffix='.mp4')
     os.close(temp_fd)
         out.release()
     return temp_path
+@spaces.GPU(duration=30)  # Set duration to 30 seconds for real-time processing
 def analyze_distraction_video(input_video):
     cap = cv2.VideoCapture(input_video)
     if not cap.isOpened():
         print("Error: Could not open video file.")
             break
         try:
+            results = distraction_model(frame, conf=DISTRACTION_CONF_THRESHOLD, verbose=False)
             display_text = "safe driving"
             alarm_action = None
         out.release()
     return temp_path
+@spaces.GPU(duration=30)  # Set duration to 30 seconds for real-time processing
+def process_distraction_frame(frame):
+    global stop_distraction_processing
+    if stop_distraction_processing:
+        return np.zeros((480, 640, 3), dtype=np.uint8)
+    if frame is None:
+        return np.zeros((480, 640, 3), dtype=np.uint8)
+    try:
+        # Run distraction detection model
+        results = distraction_model(frame, conf=DISTRACTION_CONF_THRESHOLD, verbose=False)
+        display_text = "safe driving"
+        alarm_action = None
+        for result in results:
+            if result.boxes is not None and len(result.boxes) > 0:
+                boxes = result.boxes.xyxy.cpu().numpy()
+                scores = result.boxes.conf.cpu().numpy()
+                classes = result.boxes.cls.cpu().numpy()
+                if len(boxes) > 0:
+                    # Draw bounding boxes
+                    for i, box in enumerate(boxes):
+                        x1, y1, x2, y2 = map(int, box)
+                        cls_id = int(classes[i])
+                        confidence = scores[i]
+                        if 0 <= cls_id < len(distraction_class_names):
+                            action = distraction_class_names[cls_id]
+                            color = (0, 255, 0) if action == "safe driving" else (0, 0, 255)
+                            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
+                            cv2.putText(frame, f"{action} {confidence:.2f}", (x1, y1-10),
+                                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+                            # Select highest confidence detection for status
+                            if i == scores.argmax():
+                                detected_action = action
+                                confidence_score = confidence
+                                display_text = f"{detected_action}: {confidence_score:.2f}"
+                                if detected_action != 'safe driving':
+                                    alarm_action = detected_action
+                        else:
+                            print(f"Warning: Detected class index {cls_id} out of bounds.")
+                            display_text = "Unknown Detection"
+        if alarm_action:
+            cv2.putText(frame, f"ALERT: {alarm_action}", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+        # Always show current detection status
+        text_color = (0, 255, 0) if alarm_action is None else (0, 255, 255)
+        cv2.putText(frame, display_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, text_color, 2)
+        # Convert BGR to RGB for Gradio display
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return frame_rgb
+    except Exception as e:
+        print(f"Error processing frame for distraction detection: {e}")
+        error_frame = np.zeros((480, 640, 3), dtype=np.uint8)
+        if not error_frame.flags.writeable:
+            error_frame = error_frame.copy()
+        cv2.putText(error_frame, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
+        return error_frame
 def terminate_gaze_stream():
     global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
     global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam, stop_gaze_processing
 def terminate_distraction_stream():
     global stop_distraction_processing
+    print("Distraction Termination signal received. Stopping processing.")
     stop_distraction_processing = True
+    return "Distraction Processing Terminated."
+@spaces.GPU(duration=30)  # Set duration to 30 seconds for real-time processing
 def process_gaze_frame(frame):
     global gaze_history, head_history, ear_history, stable_gaze_time, stable_head_time
     global eye_closed_time, blink_count, start_time, is_unconscious, frame_count_webcam, stop_gaze_processing
         start_time = current_time
     try:
+        head_pose_gaze, gaze_h, gaze_v = gaze_predictor.predict_gaze(frame)
+        current_gaze = np.array([gaze_h, gaze_v]) if gaze_h is not None and gaze_v is not None else None
+        smoothed_gaze = smooth_values(gaze_history, current_gaze)
+        ear, left_eye, right_eye, head_pose, left_iris, right_iris = blink_detector.detect_blinks(frame)
         if ear is None:
             cv2.putText(frame, "No face detected", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
         cv2.putText(error_frame, f"Error: {e}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
         return error_frame
 def create_gaze_interface():
     with gr.Blocks() as gaze_demo:
         gr.Markdown("## Real-time Gaze & Drowsiness Tracking")
         with gr.Row():
+            webcam_stream = WebRTC(label="Webcam Stream", rtc_configuration=rtc_configuration)
         with gr.Row():
             terminate_btn = gr.Button("Terminate Process")
     return gaze_demo
 def create_distraction_interface():
+    with gr.Blocks() as distraction_demo:
+        gr.Markdown("## Real-time Distraction Detection")
         with gr.Row():
+            webcam_stream = WebRTC(label="Webcam Stream", rtc_configuration=rtc_configuration)
         with gr.Row():
             terminate_btn = gr.Button("Terminate Process")
         terminate_btn.click(fn=terminate_distraction_stream, inputs=None, outputs=None)
+    return distraction_demo
 def create_video_interface():
     video_demo = gr.Interface(
     return video_demo
 demo = gr.TabbedInterface(
+    [create_video_interface(), create_gaze_interface(), create_distraction_interface()],
+    ["Gaze Video Upload", "Gaze & Drowsiness (Live)", "Distraction Detection (Live)"],
     title="Driver Monitoring System"
 )
     frame_count_webcam = 0
     stop_gaze_processing = False
     stop_distraction_processing = False
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -11,6 +11,4 @@ tensorflow
 pygame
 twilio
 ultralytics==8.3.93
-# torch==2.6.0 # Replace with ZeroGPU compatible version, e.g., 2.4.0
-torch==2.4.0 # Example compatible version
-spaces # Add spaces for ZeroGPU

 pygame
 twilio
 ultralytics==8.3.93
+torch==2.6.0