Spaces:

luminoussg
/

yolocountcars

Paused

App Files Files Community

luminoussg commited on May 25, 2024

Commit

f7fcdf0

verified ·

1 Parent(s): 6f0d6b6

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -141

app.py CHANGED Viewed

@@ -1,16 +1,19 @@
 import gradio as gr
 import cv2
 import os
-import subprocess
 import pandas as pd
 import numpy as np
 import torch
 from ultralytics import YOLO
-with gr.Blocks(theme=gr.themes.Dark()) as demo:
-# Loading a YOLO model
-model = YOLO('yolov8x.pt')
 dict_classes = model.model.names
 # Auxiliary functions
@@ -21,153 +24,165 @@ def resize_frame(frame, scale_percent):
     resized = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
     return resized
-# Processing function
-def process_video(video_path, line_position):
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    print(f"Using device: {device}")
-    model.to(device)
-    # Read video
-    video = cv2.VideoCapture(video_path)
-    # Scaling percentage of original frame
-    scale_percent = 50
-    class_IDS = [2, 3, 5, 7]
-    cy_linha = int(line_position * scale_percent / 100)
-    cx_sentido = int(2000 * scale_percent / 100)
-    offset = int(8 * scale_percent / 100)
-    # Initializing counters
-    contador_in = 0
-    contador_out = 0
-    veiculos_contador_in = dict.fromkeys(class_IDS, 0)
-    veiculos_contador_out = dict.fromkeys(class_IDS, 0)
-    # Getting video properties
-    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
-    fps = video.get(cv2.CAP_PROP_FPS)
-    if scale_percent != 100:
-        width = int(width * scale_percent / 100)
-        height = int(height * scale_percent / 100)
-    # Setting up video writer
-    tmp_output_path = "tmp_output.mp4"
-    output_video = cv2.VideoWriter(tmp_output_path,
-                                   cv2.VideoWriter_fourcc(*'mp4v'),
-                                   fps, (width, height))
-    for i in range(int(video.get(cv2.CAP_PROP_FRAME_COUNT))):
-        ret, frame = video.read()
         if not ret:
             break
         frame = resize_frame(frame, scale_percent)
-        y_hat = model.predict(frame, conf=0.7, classes=class_IDS, device=device, verbose=False)
-        boxes = y_hat[0].boxes.xyxy.cpu().numpy()
-        conf = y_hat[0].boxes.conf.cpu().numpy()
-        classes = y_hat[0].boxes.cls.cpu().numpy()
-        positions_frame = pd.DataFrame(boxes, columns=['xmin', 'ymin', 'xmax', 'ymax'])
-        positions_frame['conf'] = conf
-        positions_frame['class'] = classes
-        labels = [dict_classes[i] for i in classes]
-        cv2.line(frame, (0, cy_linha), (int(4500 * scale_percent / 100), cy_linha), (255, 255, 0), 8)
-        for ix, row in positions_frame.iterrows():
-            xmin, ymin, xmax, ymax, confidence, category = row.astype('int')
-            center_x, center_y = int((xmax + xmin) / 2), int((ymax + ymin) / 2)
-            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 0, 0), 5)
-            cv2.circle(frame, (center_x, center_y), 5, (255, 0, 0), -1)
-            cv2.putText(img=frame, text=labels[ix] + ' - ' + str(np.round(conf[ix], 2)),
-                        org=(xmin, ymin-10), fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=(255, 0, 0), thickness=2)
-            # Adjust counting logic based on new line position
-            if (center_y < (cy_linha + offset)) and (center_y > (cy_linha - offset)):
-                if (center_x >= 0) and (center_x <= cx_sentido):
-                    contador_in += 1
-                    veiculos_contador_in[category] += 1
-                else:
-                    contador_out += 1
-                    veiculos_contador_out[category] += 1
-        contador_in_plt = [f'{dict_classes[k]}: {i}' for k, i in veiculos_contador_in.items()]
-        contador_out_plt = [f'{dict_classes[k]}: {i}' for k, i in veiculos_contador_out.items()]
-        cv2.putText(img=frame, text='N. vehicles In',
-                    org=(30, 30), fontFace=cv2.FONT_HERSHEY_TRIPLEX,
-                    fontScale=1, color=(255, 255, 0), thickness=1)
-        cv2.putText(img=frame, text='N. vehicles Out',
-                    org=(int(2800 * scale_percent / 100), 30),
-                    fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=(255, 255, 0), thickness=1)
-        xt = 40
-        for txt in range(len(contador_in_plt)):
-            xt += 30
-            cv2.putText(img=frame, text=contador_in_plt[txt],
-                        org=(30, xt), fontFace=cv2.FONT_HERSHEY_TRIPLEX,
-                        fontScale=1, color=(255, 255, 0), thickness=1)
-            cv2.putText(img=frame, text=contador_out_plt[txt],
-                        org=(int(2800 * scale_percent / 100), xt), fontFace=cv2.FONT_HERSHEY_TRIPLEX,
-                        fontScale=1, color=(255, 255, 0), thickness=1)
-        cv2.putText(img=frame, text=f'In:{contador_in}',
-                    org=(int(1820 * scale_percent / 100), cy_linha + 60),
-                    fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=(255, 255, 0), thickness=2)
-        cv2.putText(img=frame, text=f'Out:{contador_out}',
-                    org=(int(1800 * scale_percent / 100), cy_linha - 40),
-                    fontFace=cv2.FONT_HERSHEY_TRIPLEX, fontScale=1, color=(255, 255, 0), thickness=2)
-        output_video.write(frame)
-    output_video.release()
-    # Post-processing
-    output_path = "output.mp4"
-    if os.path.exists(output_path):
-        os.remove(output_path)
     subprocess.run(
-        ["ffmpeg", "-i", tmp_output_path, "-crf", "18", "-preset", "veryfast", "-hide_banner", "-loglevel", "error", "-vcodec", "libx264", output_path])
     os.remove(tmp_output_path)
     return output_path
-# Gradio interface
-with gr.Blocks() as demo:
-    video_input = gr.File(label="Upload your video")
-    line_position = gr.Slider(0, 3000, value=1500, label="Line Position (px)")
-    preview_button = gr.Button("Preview Line")
-    process_button = gr.Button("Process Video")
-    video_output = gr.Video(label="Processed Video")
-    download_button = gr.File(label="Download Processed Video")
-    def preview_line(video, line_position):
-        video = cv2.VideoCapture(video.name)
-        ret, frame = video.read()
-        if ret:
-            scale_percent = 50
-            cy_linha = int(line_position * scale_percent / 100)
-            frame = resize_frame(frame, scale_percent)
-            cv2.line(frame, (0, cy_linha), (int(4500 * scale_percent / 100), cy_linha), (255, 255, 0), 8)
-            output_path = "preview_line.jpg"
-            cv2.imwrite(output_path, frame)
-            return output_path
-        return None
-    def process_video_and_display(video, line_position):
-        output_path = process_video(video.name, line_position)
-        return output_path, output_path
-    preview_button.click(preview_line, inputs=[video_input, line_position], outputs=gr.Image(label="Preview Line"))
-    process_button.click(process_video_and_display, inputs=[video_input, line_position], outputs=[video_output, download_button])
-demo.launch()

 import gradio as gr
 import cv2
 import os
 import pandas as pd
 import numpy as np
 import torch
 from ultralytics import YOLO
+from ultralytics.solutions import object_counter
+import subprocess
+import spaces  # Import spaces for ZeroGPU integration
+# Initialize the YOLO model
+MODEL = "yolov8n.pt"
+model = YOLO(MODEL)
+model.fuse()
 dict_classes = model.model.names
 # Auxiliary functions
     resized = cv2.resize(frame, dim, interpolation=cv2.INTER_AREA)
     return resized
+@spaces.GPU
+def process_video(video_file, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness, draw_tracks, view_img, view_in_counts, view_out_counts, track_thickness, region_thickness, line_dist_thresh, persist, conf, iou, classes, verbose):
+    # Ensure classes is a list of integers
+    classes = [int(x) for x in classes.split(',') if x.strip().isdigit()] if classes else None
+    line_points = [(line_start_x, line_start_y), (line_end_x, line_end_y)]
+    cap = cv2.VideoCapture(video_file)
+    if not cap.isOpened():
+        raise ValueError("Failed to open video file")
+    tmp_output_path = "processed_output_temp.mp4"
+    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) * scale_percent / 100)
+    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) * scale_percent / 100)
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    video_writer = cv2.VideoWriter(tmp_output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
+    counter = object_counter.ObjectCounter(
+        classes_names=model.names,
+        view_img=view_img,
+        reg_pts=line_points,
+        draw_tracks=draw_tracks,
+        line_thickness=int(line_thickness),
+        track_thickness=int(track_thickness),
+        region_thickness=int(region_thickness),
+        line_dist_thresh=line_dist_thresh,
+        view_in_counts=view_in_counts,
+        view_out_counts=view_out_counts,
+        count_reg_color=(255, 0, 255),  # Magenta
+        track_color=(0, 255, 0),  # Green
+        count_txt_color=(255, 255, 255),  # White
+        count_bg_color=(50, 50, 50)  # Dark gray
+    )
+    prev_frame = None
+    prev_keypoints = None
+    while cap.isOpened():
+        ret, frame = cap.read()
         if not ret:
             break
         frame = resize_frame(frame, scale_percent)
+        # Adjust line points based on scaling
+        scaled_line_points = [(int(x * scale_percent / 100), int(y * scale_percent / 100)) for x, y in line_points]
+        for point1, point2 in zip(scaled_line_points[:-1], scaled_line_points[1:]):
+            cv2.line(frame, tuple(map(int, point1)), tuple(map(int, point2)), (255, 255, 0), int(line_thickness))
+        tracks = model.track(frame, persist=persist, conf=conf, iou=iou, classes=classes, verbose=verbose)
+        # Update the counter with the current frame and tracks
+        frame = counter.start_counting(frame, tracks)
+        # Check if the previous frame is initialized for optical flow calculation
+        if prev_frame is not None:
+            try:
+                prev_frame_resized = resize_frame(prev_frame, scale_percent)
+                matched_keypoints, status, _ = cv2.calcOpticalFlowPyrLK(prev_frame_resized, frame, prev_keypoints, None)
+                prev_keypoints = matched_keypoints
+            except cv2.error as e:
+                print(f"Error in optical flow calculation: {e}")
+        prev_frame = frame.copy()
+        prev_keypoints = cv2.goodFeaturesToTrack(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY), maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)
+        video_writer.write(frame)
+    cap.release()
+    video_writer.release()
+    # Reduce the resolution of the video for download
+    output_path = "processed_output.mp4"
+    if h > 1080:
+        resolution = "1920x1080"
+    else:
+        resolution = "1280x720"
     subprocess.run(
+        ["ffmpeg", "-y", "-i", tmp_output_path, "-vf", f"scale={resolution}", "-crf", "18", "-preset", "veryfast", "-hide_banner", "-loglevel", "error", output_path]
+    )
     os.remove(tmp_output_path)
     return output_path
+def preview_line(video_file, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness):
+    cap = cv2.VideoCapture(video_file)
+    ret, frame = cap.read()
+    if not ret:
+        raise ValueError("Failed to read video frame")
+    frame = resize_frame(frame, scale_percent)
+    line_points = [(line_start_x, line_start_y), (line_end_x, line_end_y)]
+    scaled_line_points = [(int(x * scale_percent / 100), int(y * scale_percent / 100)) for x, y in line_points]
+    for point1, point2 in zip(scaled_line_points[:-1], scaled_line_points[1:]):
+        cv2.line(frame, tuple(map(int, point1)), tuple(map(int, point2)), (255, 255, 0), int(line_thickness))
+    preview_path = "preview_line.jpg"
+    cv2.imwrite(preview_path, frame)
+    return preview_path
+def gradio_app(video, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness, draw_tracks, view_img, view_in_counts, view_out_counts, track_thickness, region_thickness, line_dist_thresh, persist, conf, iou, classes_to_track, verbose):
+    output_path = process_video(video.name, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, int(line_thickness), draw_tracks, view_img, view_in_counts, view_out_counts, int(track_thickness), int(region_thickness), line_dist_thresh, persist, conf, iou, classes_to_track, verbose)
+    return output_path, output_path
+def update_preview(video, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness):
+    return preview_line(video.name, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, int(line_thickness))
+def set_4k_coordinates():
+    return 0, 1500, 3840, 1500
+def set_1080p_coordinates():
+    return 0, 700, 1920, 700
+with gr.Blocks(theme="dark") as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            video_input = gr.File(label="Upload your video")
+            with gr.Row():
+                set_4k_button = gr.Button("4K")
+                set_1080p_button = gr.Button("1080p")
+            line_start_x = gr.Number(label="Line Start X", value=500, precision=0)
+            line_start_y = gr.Number(label="Line Start Y", value=1500, precision=0)
+            line_end_x = gr.Number(label="Line End X", value=3400, precision=0)
+            line_end_y = gr.Number(label="Line End Y", value=1500, precision=0)
+            line_thickness = gr.Slider(minimum=1, maximum=10, value=2, label="Line Thickness")
+            draw_tracks = gr.Checkbox(label="Draw Tracks", value=True)
+            view_img = gr.Checkbox(label="Display Image with Annotations", value=True)
+            view_in_counts = gr.Checkbox(label="Display In-Counts", value=True)
+            view_out_counts = gr.Checkbox(label="Display Out-Counts", value=True)
+            track_thickness = gr.Slider(minimum=1, maximum=10, value=2, label="Track Thickness")
+            region_thickness = gr.Slider(minimum=1, maximum=10, value=5, label="Region Thickness")
+            line_dist_thresh = gr.Slider(minimum=5, maximum=50, value=15, label="Line Distance Threshold")
+            persist = gr.Checkbox(label="Persist Tracks", value=True)
+            conf = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Confidence Threshold")
+            iou = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.05, label="IOU Threshold")
+            classes_to_track = gr.Textbox(label="Classes to Track (comma-separated ids)", value="2,3,5,7")
+            verbose = gr.Checkbox(label="Verbose Tracking", value=True)
+            scale_percent = gr.Slider(minimum=10, maximum=100, value=100, step=10, label="Scale Percentage")
+            process_button = gr.Button("Process Video")
+        with gr.Column(scale=2):
+            preview_image = gr.Image(label="Preview Line")
+            video_output = gr.Video(label="Processed Video")
+            download_button = gr.File(label="Download Processed Video")
+    def update_preview_and_display(video, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness):
+        preview_path = update_preview(video, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness)
+        return preview_path
+    video_input.change(update_preview_and_display, inputs=[video_input, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness], outputs=preview_image)
+    for component in [scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness, draw_tracks, view_img, view_in_counts, view_out_counts, track_thickness, region_thickness, line_dist_thresh, persist, conf, iou, classes_to_track, verbose]:
+        component.change(update_preview_and_display, inputs=[video_input, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness], outputs=preview_image)
+    set_4k_button.click(lambda: set_4k_coordinates(), outputs=[line_start_x, line_start_y, line_end_x, line_end_y])
+    set_1080p_button.click(lambda: set_1080p_coordinates(), outputs=[line_start_x, line_start_y, line_end_x, line_end_y])
+    def clear_previous_video():
+        return None, None
+    process_button.click(clear_previous_video, outputs=[video_output, download_button], queue=False)
+    process_button.click(gradio_app, inputs=[video_input, scale_percent, line_start_x, line_start_y, line_end_x, line_end_y, line_thickness, draw_tracks, view_img, view_in_counts, view_out_counts, track_thickness, region_thickness, line_dist_thresh, persist, conf, iou, classes_to_track, verbose], outputs=[video_output, download_button])
+demo.launch()