import torch import cv2 import numpy as np import gradio as gr from ultralytics import YOLO # Check if CUDA (GPU support) is available # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load the YOLOv8 model model = YOLO('yolov8x-seg.pt') def process_video(input_video_path): cap = cv2.VideoCapture(input_video_path) if not cap.isOpened(): print("Error: Couldn't open the video file.") return frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter("output_video.mp4", fourcc, fps, (frame_width, frame_height)) while cap.isOpened(): ret, frame = cap.read() if not ret: break # Resize frame to match the expected input shape of the model resized_frame = cv2.resize(frame, (640, 640)) # Convert resized frame to torch tensor and move it to GPU frame_tensor = torch.from_numpy(resized_frame).permute(2, 0, 1).unsqueeze(0).float() / 255.0 threshold = 0.2 frame_copy = frame.copy() results = model(frame_tensor)[0] for result in results.boxes.data.tolist(): x1, y1, x2, y2, score, class_id = result if score > threshold: cv2.rectangle(frame_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 4) cv2.putText(frame_copy, results.names[int(class_id)].upper(), (int(x1), int(y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 255, 0), 3, cv2.LINE_AA) cv2.putText(frame_copy, str(score), (int(x1), int(y2 + 10)), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 255), 3, cv2.LINE_AA) out.write(frame_copy) cap.release() out.release() cv2.destroyAllWindows() return "output_video.mp4" # Define the input and output interfaces for Gradio inputs_video = gr.Video(label="Input Video") outputs_video = gr.Video(label="Output Video") # Create the Gradio interface demo = gr.Interface( fn=process_video, inputs=inputs_video, outputs=outputs_video, title="Animal detector using YOLOv8 NANO for Videos (GPU)", ) # Launch the interface demo.launch()