yolo-detector / utils.py
sergey21000's picture
Update utils.py
ed01d72 verified
import os
import glob
import json
import urllib.request
from pathlib import Path
from typing import Tuple
import numpy as np
import cv2
# import yt_dlp
import gradio as gr
from ultralytics import YOLO
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('dark_background')
plt.rcParams.update({'figure.figsize': (12, 20)})
plt.rcParams.update({'font.size': 9})
YOLO_CLASS_NAMES = json.loads(Path('yolo_classes.json').read_text())
YOLO_CLASS_NAMES = {int(k): v for k, v in YOLO_CLASS_NAMES.items()}
def download_model(model_name: str, models_dir: Path, models: dict) -> str:
model_path = models_dir / model_name
if not model_path.exists():
urllib.request.urlretrieve(models[model_name], model_path)
return str(model_path)
def detect_image(image_path: str, model: YOLO, conf: float, iou: float) -> np.ndarray:
gr.Progress()(0.5, desc='Image detection...')
detections = model.predict(source=image_path, conf=conf, iou=iou)
np_image = detections[0].plot()
np_image = cv2.cvtColor(np_image, cv2.COLOR_BGR2RGB)
return np_image
def detect_video(video_path_or_url: str, model: YOLO, conf: float, iou: float) -> Tuple[Path, Path]:
progress = gr.Progress()
video_path = video_path_or_url
# if 'youtube.com' in video_path_or_url or 'youtu.be' in video_path_or_url:
# progress(0.001, desc='Downloading video from YouTube...')
# ydl_opts = {'format': 'bestvideo[height<=720]'}
# with yt_dlp.YoutubeDL(ydl_opts) as ydl:
# video_info_dict = ydl.extract_info(video_path_or_url, download=True)
# video_path = ydl.prepare_filename(video_info_dict)
cap = cv2.VideoCapture(video_path)
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
generator = model.predict(
source=video_path,
conf=0.5,
iou=0.5,
save=True,
save_txt=True,
save_conf=True,
stream=True,
verbose=False,
)
frames_count = 0
for result in generator:
frames_count += 1
progress((frames_count, num_frames), desc=f'Video detection, step {frames_count}/{num_frames}')
file_name = Path(result.path).with_suffix('.avi').name
result_video_path = Path(result.save_dir) / file_name
Path(video_path).unlink(missing_ok=True)
return result_video_path
def get_csv_annotate(result_video_path: Path) -> str:
if not isinstance(result_video_path, Path):
return None
txts_path = result_video_path.parent / 'labels'
escaped_pattern = glob.escape(result_video_path.stem)
matching_txts_path = sorted(txts_path.glob(f'{escaped_pattern}_*.txt'), key=os.path.getmtime)
df_list = []
for txt_path in matching_txts_path:
frame_number = int(txt_path.stem.rsplit('_')[-1])
with open(txt_path) as file:
df_rows = file.readlines()
for df_row in df_rows:
df_row = map(float, df_row.split())
df_list.append((frame_number, *df_row))
column_names = ['frame_number', 'class_label', 'x', 'y', 'w', 'h', 'conf']
df = pd.DataFrame(df_list, columns=column_names)
df.class_label = df.class_label.astype(int)
class_name_series = df.class_label.map(YOLO_CLASS_NAMES)
df.insert(loc=1, column='class_name', value=class_name_series)
cap = cv2.VideoCapture(str(result_video_path))
frames_fps = int(cap.get(cv2.CAP_PROP_FPS))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
cap.release()
frame_sec_series = df.frame_number / frames_fps
df.insert(loc=1, column='frame_sec', value=frame_sec_series)
full_frames = pd.DataFrame({'frame_number': range(total_frames)})
df = pd.merge(full_frames, df, on='frame_number', how='outer')
df.frame_sec = df.frame_number / frames_fps
result_csv_path = f'{result_video_path.parent / result_video_path.stem}_annotations.csv'
df.to_csv(result_csv_path, index=False)
return result_csv_path
def get_matplotlib_fig(csv_annotations_path: str):
df = pd.read_csv(csv_annotations_path)
df_clean = df.dropna(subset=['class_name'])
fig, axes = plt.subplots(7, 1, figsize=(10, 20), constrained_layout=True)
sns.histplot(data=df_clean['conf'], kde=True, ax=axes[0])
axes[0].set_title('Распределение уверенности детекций')
axes[0].set_xlabel('Уверенность')
axes[0].set_ylabel('Количество обнаружений')
sns.boxplot(data=df_clean, x='class_name', y='conf', ax=axes[1])
axes[1].set_title('Распределение уверенности детекции по классам')
axes[1].set_xlabel('Класс объекта')
axes[1].set_ylabel('Уверенность')
# axes[1].tick_params(axis='x', labelrotation=45)
sns.countplot(
data=df_clean,
x='class_name',
hue='class_name',
order=df_clean['class_name'].value_counts().index,
palette='viridis',
legend=False,
ax=axes[2],
)
axes[2].set_title('Количество обнаружений объектов по классам')
axes[2].set_xlabel('Класс объекта')
axes[2].set_ylabel('Количество')
face_count_per_frame = df.groupby('frame_number')['box_detected'].sum()
axes[3].plot(face_count_per_frame.index, face_count_per_frame.values, marker='o', linestyle='-')
axes[3].set_title('Частота обнаружения объектов по кадрам')
axes[3].set_xlabel('Номер кадра')
axes[3].set_ylabel('Количество обнаруженных объектов')
face_count_per_frame = df.groupby('frame_sec')['box_detected'].sum()
axes[4].plot(face_count_per_frame.index, face_count_per_frame.values, marker='o', linestyle='-')
axes[4].set_title('Частота обнаружения объектов по секундам')
axes[4].set_xlabel('Время (сек)')
axes[4].set_ylabel('Количество обнаруженных объектов')
sns.scatterplot(
data=df_clean,
x='frame_sec',
y='class_name',
hue='class_name',
palette='deep',
s=50,
alpha=0.6,
legend=True,
ax=axes[5],
)
axes[5].set_title('Временная шкала обнаружения объектов по классам')
axes[5].set_xlabel('Время видео (секунды)')
axes[5].set_ylabel('Эмоция')
axes[5].grid(True, linestyle='--', alpha=0.7)
axes[5].legend(title='Классы объектов', bbox_to_anchor=(1.05, 1), loc='upper left')
emotion_timeline = df.pivot_table(index='frame_sec', columns='class_name', aggfunc='size', fill_value=0)
emotion_timeline.plot(kind='area', stacked=True, ax=axes[6])
axes[6].set_title('Динамика обнаружения классов во времени')
axes[6].set_xlabel('Время видео (секунды)')
axes[6].set_ylabel('Количество детекций')
axes[6].grid(True, linestyle='--', alpha=0.7)
axes[6].legend(title='Классы объектов', bbox_to_anchor=(1.05, 1), loc='upper left')
return fig