Spaces:

Segizu
/

Computer_Vision

Sleeping

App Files Files Community

Computer_Vision / app.py

Segizu

yolov8

8ac08dc 4 months ago

raw

history blame

3 kB

	import cv2
	import gradio as gr
	from ultralytics import YOLO
	from PIL import Image
	import tempfile

	# Cargamos el modelo YOLOv8 (puedes usar yolov8n.pt, yolov8s.pt, etc.)
	model = YOLO("yolov8n.pt")

	def process_video(video_path):
	"""
	Procesa un video, detecta personas, bicicletas y motos con YOLOv8,
	y dibuja los recuadros y etiquetas en cada frame. Devuelve un .mp4 anotado.
	"""
	cap = cv2.VideoCapture(video_path)
	if not cap.isOpened():
	return None

	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = cap.get(cv2.CAP_PROP_FPS)

	# Creamos un archivo temporal para guardar el resultado
	tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
	output_path = tmp_file.name
	tmp_file.close()

	# Usamos un códec compatible con navegadores (H.264 / avc1)
	fourcc = cv2.VideoWriter_fourcc(*'avc1')
	out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

	# Clases que nos interesan
	valid_classes = ["person", "bicycle", "motorcycle"]

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	# Convertir BGR -> RGB para predecir con YOLO
	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

	# Hacemos la inferencia con un umbral de confianza del 0.5
	results = model.predict(frame_rgb, conf=0.5)
	# results es una lista; tomamos la primera predicción
	boxes = results[0].boxes

	# Dibujamos cada bounding box
	for box in boxes:
	# box.cls, box.conf y box.xyxy son tensores, así que convertimos a Python float/int
	cls_id = int(box.cls[0].item()) # Índice de la clase
	conf = float(box.conf[0].item()) # Confianza
	x1, y1, x2, y2 = box.xyxy[0] # Coordenadas [xmin, ymin, xmax, ymax]

	class_name = model.names[cls_id]
	if class_name in valid_classes:
	# Dibujamos el rectángulo
	cv2.rectangle(frame,
	(int(x1), int(y1)),
	(int(x2), int(y2)),
	(0, 255, 0), 2)

	text = f"{class_name} {conf:.2f}"
	cv2.putText(frame, text,
	(int(x1), int(y1) - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5,
	(0, 255, 0), 2)

	# Guardamos el frame anotado en el video de salida
	out.write(frame)

	cap.release()
	out.release()
	return output_path

	# Interfaz de Gradio
	iface = gr.Interface(
	fn=process_video,
	inputs=gr.Video(label="Sube tu video"),
	outputs=gr.Video(label="Video procesado"),
	title="Detección de Objetos con YOLOv8",
	description="Sube un video y se detectan personas, bicicletas y motos con YOLOv8. "
	"Los objetos se enmarcan y etiquetan en el video resultante."
	)

	if __name__ == "__main__":
	iface.launch()