Spaces:
Running
on
Zero
Running
on
Zero
yolov8
Browse files- app.py +50 -48
- requirements.txt +3 -1
app.py
CHANGED
@@ -1,84 +1,86 @@
|
|
1 |
import cv2
|
2 |
import gradio as gr
|
3 |
-
from
|
4 |
from PIL import Image
|
5 |
import tempfile
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
|
10 |
def process_video(video_path):
|
11 |
"""
|
12 |
-
Procesa un video, detecta
|
13 |
-
|
14 |
-
Devuelve el video anotado.
|
15 |
"""
|
16 |
cap = cv2.VideoCapture(video_path)
|
17 |
if not cap.isOpened():
|
18 |
return None
|
19 |
|
20 |
-
|
21 |
-
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
22 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
23 |
-
fps
|
24 |
-
|
25 |
-
#
|
26 |
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
27 |
output_path = tmp_file.name
|
28 |
-
tmp_file.close()
|
29 |
|
30 |
-
#
|
31 |
-
fourcc = cv2.VideoWriter_fourcc(*'
|
32 |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
33 |
-
|
34 |
-
#
|
35 |
-
|
36 |
-
threshold = 0.7 # Umbral de confianza
|
37 |
|
38 |
while True:
|
39 |
ret, frame = cap.read()
|
40 |
if not ret:
|
41 |
break
|
42 |
-
|
43 |
-
# Convertir
|
44 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
results
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
70 |
out.write(frame)
|
71 |
-
|
72 |
cap.release()
|
73 |
out.release()
|
74 |
return output_path
|
75 |
|
|
|
76 |
iface = gr.Interface(
|
77 |
fn=process_video,
|
78 |
inputs=gr.Video(label="Sube tu video"),
|
79 |
outputs=gr.Video(label="Video procesado"),
|
80 |
-
title="Detecci贸n
|
81 |
-
description="
|
|
|
82 |
)
|
83 |
|
84 |
if __name__ == "__main__":
|
|
|
1 |
import cv2
|
2 |
import gradio as gr
|
3 |
+
from ultralytics import YOLO
|
4 |
from PIL import Image
|
5 |
import tempfile
|
6 |
|
7 |
+
# Cargamos el modelo YOLOv8 (puedes usar yolov8n.pt, yolov8s.pt, etc.)
|
8 |
+
model = YOLO("yolov8n.pt")
|
9 |
|
10 |
def process_video(video_path):
|
11 |
"""
|
12 |
+
Procesa un video, detecta personas, bicicletas y motos con YOLOv8,
|
13 |
+
y dibuja los recuadros y etiquetas en cada frame. Devuelve un .mp4 anotado.
|
|
|
14 |
"""
|
15 |
cap = cv2.VideoCapture(video_path)
|
16 |
if not cap.isOpened():
|
17 |
return None
|
18 |
|
19 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
|
20 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
21 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
22 |
+
|
23 |
+
# Creamos un archivo temporal para guardar el resultado
|
24 |
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
|
25 |
output_path = tmp_file.name
|
26 |
+
tmp_file.close()
|
27 |
|
28 |
+
# Usamos un c贸dec compatible con navegadores (H.264 / avc1)
|
29 |
+
fourcc = cv2.VideoWriter_fourcc(*'avc1')
|
30 |
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
|
31 |
+
|
32 |
+
# Clases que nos interesan
|
33 |
+
valid_classes = ["person", "bicycle", "motorcycle"]
|
|
|
34 |
|
35 |
while True:
|
36 |
ret, frame = cap.read()
|
37 |
if not ret:
|
38 |
break
|
39 |
+
|
40 |
+
# Convertir BGR -> RGB para predecir con YOLO
|
41 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
42 |
+
|
43 |
+
# Hacemos la inferencia con un umbral de confianza del 0.5
|
44 |
+
results = model.predict(frame_rgb, conf=0.5)
|
45 |
+
# results es una lista; tomamos la primera predicci贸n
|
46 |
+
boxes = results[0].boxes
|
47 |
+
|
48 |
+
# Dibujamos cada bounding box
|
49 |
+
for box in boxes:
|
50 |
+
# box.cls, box.conf y box.xyxy son tensores, as铆 que convertimos a Python float/int
|
51 |
+
cls_id = int(box.cls[0].item()) # 脥ndice de la clase
|
52 |
+
conf = float(box.conf[0].item()) # Confianza
|
53 |
+
x1, y1, x2, y2 = box.xyxy[0] # Coordenadas [xmin, ymin, xmax, ymax]
|
54 |
+
|
55 |
+
class_name = model.names[cls_id]
|
56 |
+
if class_name in valid_classes:
|
57 |
+
# Dibujamos el rect谩ngulo
|
58 |
+
cv2.rectangle(frame,
|
59 |
+
(int(x1), int(y1)),
|
60 |
+
(int(x2), int(y2)),
|
61 |
+
(0, 255, 0), 2)
|
62 |
+
|
63 |
+
text = f"{class_name} {conf:.2f}"
|
64 |
+
cv2.putText(frame, text,
|
65 |
+
(int(x1), int(y1) - 10),
|
66 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5,
|
67 |
+
(0, 255, 0), 2)
|
68 |
+
|
69 |
+
# Guardamos el frame anotado en el video de salida
|
70 |
out.write(frame)
|
71 |
+
|
72 |
cap.release()
|
73 |
out.release()
|
74 |
return output_path
|
75 |
|
76 |
+
# Interfaz de Gradio
|
77 |
iface = gr.Interface(
|
78 |
fn=process_video,
|
79 |
inputs=gr.Video(label="Sube tu video"),
|
80 |
outputs=gr.Video(label="Video procesado"),
|
81 |
+
title="Detecci贸n de Objetos con YOLOv8",
|
82 |
+
description="Sube un video y se detectan personas, bicicletas y motos con YOLOv8. "
|
83 |
+
"Los objetos se enmarcan y etiquetan en el video resultante."
|
84 |
)
|
85 |
|
86 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -4,4 +4,6 @@ transformers
|
|
4 |
torch
|
5 |
tensorflow
|
6 |
torchvision
|
7 |
-
timm
|
|
|
|
|
|
4 |
torch
|
5 |
tensorflow
|
6 |
torchvision
|
7 |
+
timm
|
8 |
+
ultralytics
|
9 |
+
Pillow
|