Spaces:

AjaykumarPilla
/

DRSyolov8

Sleeping

App Files Files Community

AjaykumarPilla commited on about 1 month ago

Commit

61746ab

verified ·

1 Parent(s): 239672f

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -55

app.py CHANGED Viewed

@@ -4,36 +4,37 @@ import torch
 from ultralytics import YOLO
 import gradio as gr
 from scipy.interpolate import interp1d
-from scipy.signal import savgol_filter
 import plotly.graph_objects as go
 import uuid
 import os
-# Load the trained YOLOv8n model
 model = YOLO("best.pt")
 model.to('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
-# Constants
-STUMPS_WIDTH = 0.2286
-BALL_DIAMETER = 0.073
-FRAME_RATE = 20
-SLOW_MOTION_FACTOR = 1.5
-CONF_THRESHOLD = 0.15
-IMPACT_ZONE_Y = 0.9
-PITCH_LENGTH = 20.12
-STUMPS_HEIGHT = 0.71
-CAMERA_HEIGHT = 2.0
-CAMERA_DISTANCE = 10.0
-MAX_POSITION_JUMP = 250
 def process_video(video_path):
     if not os.path.exists(video_path):
         return [], [], [], "Error: Video file not found"
     cap = cv2.VideoCapture(video_path)
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    global FRAME_RATE
-    FRAME_RATE = cap.get(cv2.CAP_PROP_FPS) or 20
     stride = 32
     img_width = ((frame_width + stride - 1) // stride) * stride
     img_height = ((frame_height + stride - 1) // stride) * stride
@@ -49,17 +50,18 @@ def process_video(video_path):
             break
         frame_count += 1
         frames.append(frame.copy())
         frame = cv2.convertScaleAbs(frame, alpha=1.5, beta=20)
         kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
         frame = cv2.filter2D(frame, -1, kernel)
         results = model.predict(frame, conf=CONF_THRESHOLD, imgsz=(img_height, img_width), iou=0.5, max_det=5)
         detections = sum(1 for detection in results[0].boxes if detection.cls == 0)
-        if detections >= 1:
             max_conf = 0
             best_detection = None
             conf_scores = []
             for detection in results[0].boxes:
-                if detection.cls == 0:
                     conf = detection.conf.cpu().numpy()[0]
                     conf_scores.append(conf)
                     if conf > max_conf:
@@ -67,6 +69,7 @@ def process_video(video_path):
                         best_detection = detection
             if best_detection:
                 x1, y1, x2, y2 = best_detection.xyxy[0].cpu().numpy()
                 x1 = x1 * frame_width / img_width
                 x2 = x2 * frame_width / img_width
                 y1 = y1 * frame_height / img_height
@@ -78,6 +81,7 @@ def process_video(video_path):
         else:
             debug_log.append(f"Frame {frame_count}: {detections} ball detections")
         frames[-1] = frame
         cv2.imwrite(f"debug_frame_{frame_count}.jpg", frame)
     cap.release()
@@ -91,35 +95,33 @@ def process_video(video_path):
     return frames, ball_positions, detection_frames, "\n".join(debug_log)
 def pixel_to_3d(x, y, frame_height, frame_width):
     x_norm = x / frame_width
     y_norm = y / frame_height
-    x_3d = (x_norm - 0.5) * 3.0
     y_3d = y_norm * PITCH_LENGTH
-    z_3d = (1 - y_norm) * BALL_DIAMETER * 5
     return x_3d, y_3d, z_3d
 def estimate_trajectory(ball_positions, frames, detection_frames):
     if len(ball_positions) < 2:
         return None, None, None, None, None, None, None, None, None, "Error: Fewer than 2 frames with one ball detection"
     frame_height, frame_width = frames[0].shape[:2]
     debug_log = []
     filtered_positions = [ball_positions[0]]
     filtered_frames = [detection_frames[0]]
     for i in range(1, len(ball_positions)):
         prev_pos = filtered_positions[-1]
         curr_pos = ball_positions[i]
-        distance = np.linalg.norm(np.array(curr_pos) - np.array(prev_pos))
-        frame_gap = detection_frames[i] - filtered_frames[-1]
-        velocity = distance / frame_gap if frame_gap > 0 else 0
-        if distance <= MAX_POSITION_JUMP and velocity < 100:
             filtered_positions.append(curr_pos)
             filtered_frames.append(detection_frames[i])
         else:
-            debug_log.append(f"Filtered out frame {detection_frames[i]} due to sudden jump: distance={distance:.1f}, velocity={velocity:.1f}")
     if len(filtered_positions) < 2:
         return None, None, None, None, None, None, None, None, None, "Error: Fewer than 2 valid ball detections after filtering"
@@ -128,60 +130,64 @@ def estimate_trajectory(ball_positions, frames, detection_frames):
     y_coords = [pos[1] for pos in filtered_positions]
     times = np.array(filtered_frames) / FRAME_RATE
-    try:
-        x_coords = savgol_filter(x_coords, window_length=5, polyorder=2, mode='nearest')
-        y_coords = savgol_filter(y_coords, window_length=5, polyorder=2, mode='nearest')
-    except Exception as e:
-        return None, None, None, None, None, None, None, None, None, f"Smoothing error: {str(e)}"
     detections_3d = [pixel_to_3d(x, y, frame_height, frame_width) for x, y in zip(x_coords, y_coords)]
     pitch_idx = min(range(len(filtered_positions)), key=lambda i: y_coords[i])
     pitch_point = (x_coords[pitch_idx], y_coords[pitch_idx])
     pitch_frame = filtered_frames[pitch_idx]
     post_pitch_indices = [i for i in range(len(filtered_positions)) if filtered_frames[i] > pitch_frame]
     if not post_pitch_indices:
         return None, None, None, None, None, None, None, None, None, "Error: No detections after pitch point"
     impact_idx = max(post_pitch_indices, key=lambda i: y_coords[i])
     impact_point = (x_coords[impact_idx], y_coords[impact_idx])
     impact_frame = filtered_frames[impact_idx]
     try:
         fx = interp1d(times, x_coords, kind='linear', fill_value="extrapolate")
         fy = interp1d(times, y_coords, kind='linear', fill_value="extrapolate")
     except Exception as e:
         return None, None, None, None, None, None, None, None, None, f"Error in trajectory interpolation: {str(e)}"
     total_frames = max(detection_frames) - min(detection_frames) + 1
     t_full = np.linspace(min(detection_frames) / FRAME_RATE, max(detection_frames) / FRAME_RATE, int(total_frames * SLOW_MOTION_FACTOR))
     x_full = fx(t_full)
     y_full = fy(t_full)
     trajectory_2d = list(zip(x_full, y_full))
-    trajectory_3d = [pixel_to_3d(x, y, frame_height, frame_width) for x, y in trajectory_2d]
-    pitch_point_3d = pixel_to_3d(*pitch_point, frame_height, frame_width)
-    impact_point_3d = pixel_to_3d(*impact_point, frame_height, frame_width)
-    import matplotlib.pyplot as plt
-    plt.figure(figsize=(10, 6))
-    plt.plot([p[0] for p in ball_positions], [p[1] for p in ball_positions], 'kx-', label='Original')
-    plt.plot(x_coords, y_coords, 'bo-', label='Smoothed Trajectory')
-    plt.scatter(pitch_point[0], pitch_point[1], color='red', label='Pitch Point')
-    plt.scatter(impact_point[0], impact_point[1], color='yellow', label='Impact Point')
-    plt.legend()
-    plt.title("Ball Trajectory Filtering & Smoothing")
-    plt.savefig("trajectory_smooth_debug.png")
     debug_log.extend([
         f"Trajectory estimated successfully",
-        f"Pitch point at frame {pitch_frame + 1}: ({pitch_point[0]:.1f}, {pitch_point[1]:.1f})",
-        f"Impact point at frame {impact_frame + 1}: ({impact_point[0]:.1f}, {impact_point[1]:.1f})"
     ])
     return trajectory_2d, pitch_point, impact_point, pitch_frame, impact_frame, detections_3d, trajectory_3d, pitch_point_3d, impact_point_3d, "\n".join(debug_log)
 def create_3d_plot(detections_3d, trajectory_3d, pitch_point_3d, impact_point_3d, plot_type="detections"):
     stump_x = [-STUMPS_WIDTH/2, STUMPS_WIDTH/2, 0]
     stump_y = [PITCH_LENGTH, PITCH_LENGTH, PITCH_LENGTH]
     stump_z = [0, 0, 0]
@@ -288,18 +294,18 @@ def generate_slow_motion(frames, trajectory, pitch_point, impact_point, detectio
         trajectory_indices = []
     for i, frame in enumerate(frames):
-        frame_idx = i - min(detection_frames) if trajectory_indices else -1
-        if 0 <= frame_idx < total_frames and trajectory_points.size > 0:
             end_idx = trajectory_indices[frame_idx] + 1
-            cv2.polylines(frame, [trajectory_points[:end_idx]], False, (255, 0, 0), 2)
         if pitch_point and i == pitch_frame:
             x, y = pitch_point
-            cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), -1)
             cv2.putText(frame, "Pitch Point", (int(x) + 10, int(y) - 10),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
         if impact_point and i == impact_frame:
             x, y = impact_point
-            cv2.circle(frame, (int(x), int(y)), 8, (0, 255, 255), -1)
             cv2.putText(frame, "Impact Point", (int(x) + 10, int(y) + 20),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
         for _ in range(int(SLOW_MOTION_FACTOR)):
@@ -349,4 +355,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 from ultralytics import YOLO
 import gradio as gr
 from scipy.interpolate import interp1d
 import plotly.graph_objects as go
 import uuid
 import os
+from scipy.ndimage import uniform_filter1d
+# Load the trained YOLOv8n model with optimizations
 model = YOLO("best.pt")
 model.to('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
+# Constants for LBW decision and video processing
+STUMPS_WIDTH = 0.2286  # meters (width of stumps)
+BALL_DIAMETER = 0.073  # meters (approx. cricket ball diameter)
+FRAME_RATE = 20  # Default frame rate, updated dynamically
+SLOW_MOTION_FACTOR = 1.5  # Faster replay (e.g., 30 / 1.5 = 20 FPS)
+CONF_THRESHOLD = 0.15  # Lowered for better detection
+IMPACT_ZONE_Y = 0.9  # Adjusted to 90% of frame height for impact zone
+PITCH_LENGTH = 20.12  # meters (standard cricket pitch length)
+STUMPS_HEIGHT = 0.71  # meters (stumps height)
+CAMERA_HEIGHT = 2.0  # meters (assumed camera height)
+CAMERA_DISTANCE = 10.0  # meters (assumed camera distance from pitch)
+MAX_POSITION_JUMP = 250  # Increased to include more detections
 def process_video(video_path):
     if not os.path.exists(video_path):
         return [], [], [], "Error: Video file not found"
     cap = cv2.VideoCapture(video_path)
+    # Get native video resolution and frame rate
     frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    FRAME_RATE = cap.get(cv2.CAP_PROP_FPS) or 20  # Use actual frame rate or default
+    # Adjust image size to be multiple of 32 for YOLO
     stride = 32
     img_width = ((frame_width + stride - 1) // stride) * stride
     img_height = ((frame_height + stride - 1) // stride) * stride
             break
         frame_count += 1
         frames.append(frame.copy())
+        # Enhance frame contrast and sharpness
         frame = cv2.convertScaleAbs(frame, alpha=1.5, beta=20)
         kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
         frame = cv2.filter2D(frame, -1, kernel)
         results = model.predict(frame, conf=CONF_THRESHOLD, imgsz=(img_height, img_width), iou=0.5, max_det=5)
         detections = sum(1 for detection in results[0].boxes if detection.cls == 0)
+        if detections >= 1:  # Process frames with at least one ball detection
             max_conf = 0
             best_detection = None
             conf_scores = []
             for detection in results[0].boxes:
+                if detection.cls == 0:  # Class 0 is the ball
                     conf = detection.conf.cpu().numpy()[0]
                     conf_scores.append(conf)
                     if conf > max_conf:
                         best_detection = detection
             if best_detection:
                 x1, y1, x2, y2 = best_detection.xyxy[0].cpu().numpy()
+                # Scale coordinates back to original frame size
                 x1 = x1 * frame_width / img_width
                 x2 = x2 * frame_width / img_width
                 y1 = y1 * frame_height / img_height
         else:
             debug_log.append(f"Frame {frame_count}: {detections} ball detections")
         frames[-1] = frame
+        # Save debug frame
         cv2.imwrite(f"debug_frame_{frame_count}.jpg", frame)
     cap.release()
     return frames, ball_positions, detection_frames, "\n".join(debug_log)
 def pixel_to_3d(x, y, frame_height, frame_width):
+    """Convert 2D pixel coordinates to 3D real-world coordinates."""
     x_norm = x / frame_width
     y_norm = y / frame_height
+    x_3d = (x_norm - 0.5) * 3.0  # Center x at 0 (middle of pitch)
     y_3d = y_norm * PITCH_LENGTH
+    z_3d = (1 - y_norm) * BALL_DIAMETER * 5  # Scale to approximate ball bounce height
     return x_3d, y_3d, z_3d
 def estimate_trajectory(ball_positions, frames, detection_frames):
     if len(ball_positions) < 2:
         return None, None, None, None, None, None, None, None, None, "Error: Fewer than 2 frames with one ball detection"
     frame_height, frame_width = frames[0].shape[:2]
     debug_log = []
+    # Filter out sudden changes in position for continuous trajectory
     filtered_positions = [ball_positions[0]]
     filtered_frames = [detection_frames[0]]
     for i in range(1, len(ball_positions)):
         prev_pos = filtered_positions[-1]
         curr_pos = ball_positions[i]
+        distance = np.sqrt((curr_pos[0] - prev_pos[0])**2 + (curr_pos[1] - prev_pos[1])**2)
+        if distance <= MAX_POSITION_JUMP:
             filtered_positions.append(curr_pos)
             filtered_frames.append(detection_frames[i])
         else:
+            debug_log.append(f"Filtered out detection at frame {detection_frames[i] + 1}: large jump ({distance:.1f} pixels)")
+            continue
     if len(filtered_positions) < 2:
         return None, None, None, None, None, None, None, None, None, "Error: Fewer than 2 valid ball detections after filtering"
     y_coords = [pos[1] for pos in filtered_positions]
     times = np.array(filtered_frames) / FRAME_RATE
+    # Smooth coordinates to avoid sudden jumps
+    x_coords = uniform_filter1d(x_coords, size=3)
+    y_coords = uniform_filter1d(y_coords, size=3)
+    # Convert to 3D for visualization
     detections_3d = [pixel_to_3d(x, y, frame_height, frame_width) for x, y in zip(x_coords, y_coords)]
+    # Pitch point: Detection with lowest y-coordinate (near bowler's end)
     pitch_idx = min(range(len(filtered_positions)), key=lambda i: y_coords[i])
     pitch_point = (x_coords[pitch_idx], y_coords[pitch_idx])
     pitch_frame = filtered_frames[pitch_idx]
+    # Impact point: Detection with highest y-coordinate after pitch point (near stumps)
     post_pitch_indices = [i for i in range(len(filtered_positions)) if filtered_frames[i] > pitch_frame]
     if not post_pitch_indices:
         return None, None, None, None, None, None, None, None, None, "Error: No detections after pitch point"
     impact_idx = max(post_pitch_indices, key=lambda i: y_coords[i])
     impact_point = (x_coords[impact_idx], y_coords[impact_idx])
     impact_frame = filtered_frames[impact_idx]
     try:
+        # Use linear interpolation for stable trajectory
         fx = interp1d(times, x_coords, kind='linear', fill_value="extrapolate")
         fy = interp1d(times, y_coords, kind='linear', fill_value="extrapolate")
     except Exception as e:
         return None, None, None, None, None, None, None, None, None, f"Error in trajectory interpolation: {str(e)}"
+    # Generate dense points for all frames between first and last detection
     total_frames = max(detection_frames) - min(detection_frames) + 1
     t_full = np.linspace(min(detection_frames) / FRAME_RATE, max(detection_frames) / FRAME_RATE, int(total_frames * SLOW_MOTION_FACTOR))
     x_full = fx(t_full)
     y_full = fy(t_full)
     trajectory_2d = list(zip(x_full, y_full))
+    trajectory_3d = [pixel_to_3d(x, y, frame_height, frame_width) for x, y in trajectory_2d]
+    pitch_point_3d = pixel_to_3d(pitch_point[0], pitch_point[1], frame_height, frame_width)
+    impact_point_3d = pixel_to_3d(impact_point[0], impact_point[1], frame_height, frame_width)
+    # Debug trajectory and points
     debug_log.extend([
         f"Trajectory estimated successfully",
+        f"Pitch point at frame {pitch_frame + 1}: ({pitch_point[0]:.1f}, {pitch_point[1]:.1f}), 3D: {pitch_point_3d}",
+        f"Impact point at frame {impact_frame + 1}: ({impact_point[0]:.1f}, {impact_point[1]:.1f}), 3D: {impact_point_3d}",
+        f"Detections in frames: {filtered_frames}",
+        f"Total filtered detections: {len(filtered_frames)}"
     ])
+    # Save trajectory plot for debugging
+    import matplotlib.pyplot as plt
+    plt.plot(x_coords, y_coords, 'bo-', label='Filtered Detections')
+    plt.plot(pitch_point[0], pitch_point[1], 'ro', label='Pitch Point')
+    plt.plot(impact_point[0], impact_point[1], 'yo', label='Impact Point')
+    plt.legend()
+    plt.savefig("trajectory_debug.png")
     return trajectory_2d, pitch_point, impact_point, pitch_frame, impact_frame, detections_3d, trajectory_3d, pitch_point_3d, impact_point_3d, "\n".join(debug_log)
 def create_3d_plot(detections_3d, trajectory_3d, pitch_point_3d, impact_point_3d, plot_type="detections"):
+    """Create 3D Plotly visualization for detections or trajectory using single-detection frames."""
     stump_x = [-STUMPS_WIDTH/2, STUMPS_WIDTH/2, 0]
     stump_y = [PITCH_LENGTH, PITCH_LENGTH, PITCH_LENGTH]
     stump_z = [0, 0, 0]
         trajectory_indices = []
     for i, frame in enumerate(frames):
+        frame_idx = i - min_frame if trajectory_indices else -1
+        if frame_idx >= 0 and frame_idx < total_frames and trajectory_points.size > 0:
             end_idx = trajectory_indices[frame_idx] + 1
+            cv2.polylines(frame, [trajectory_points[:end_idx]], False, (255, 0, 0), 2)  # Blue line in BGR
         if pitch_point and i == pitch_frame:
             x, y = pitch_point
+            cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), -1)  # Red circle
             cv2.putText(frame, "Pitch Point", (int(x) + 10, int(y) - 10),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
         if impact_point and i == impact_frame:
             x, y = impact_point
+            cv2.circle(frame, (int(x), int(y)), 8, (0, 255, 255), -1)  # Yellow circle
             cv2.putText(frame, "Impact Point", (int(x) + 10, int(y) + 20),
                         cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
         for _ in range(int(SLOW_MOTION_FACTOR)):
 )
 if __name__ == "__main__":
+    iface.launch()