Spaces:
Runtime error
Runtime error
import torch | |
from transformers import Owlv2Processor, Owlv2ForObjectDetection | |
from PIL import Image | |
import numpy as np | |
import cv2 | |
import time | |
from typing import List, Dict, Tuple, Optional, Union | |
import os | |
from tqdm import tqdm | |
import json | |
from pathlib import Path | |
from contextlib import nullcontext | |
import threading | |
import hashlib | |
import pickle | |
from datetime import datetime | |
from dotenv import load_dotenv | |
import tempfile | |
import subprocess | |
import shutil | |
import traceback | |
import psutil | |
import logging | |
import gc | |
import sys | |
from concurrent.futures import ThreadPoolExecutor | |
import torch.nn.functional as F | |
from .base import BaseDetector, BaseCache | |
# Configurar logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Carregar variáveis de ambiente | |
load_dotenv() | |
class CPUCache(BaseCache): | |
"""Cache otimizado para CPU.""" | |
def __init__(self, max_size: int = 1000): | |
super().__init__(max_size) | |
self.device = torch.device('cpu') | |
class WeaponDetectorCPU(BaseDetector): | |
"""Implementação CPU do detector de armas.""" | |
def __init__(self): | |
"""Inicializa variáveis básicas.""" | |
super().__init__() | |
self.default_resolution = 640 | |
self.device = torch.device('cpu') | |
def _get_best_device(self): | |
return torch.device('cpu') | |
def _initialize(self): | |
"""Inicializa o modelo e o processador para execução em CPU.""" | |
try: | |
# Configurações otimizadas para CPU | |
torch.set_num_threads(min(8, os.cpu_count())) | |
torch.set_num_interop_threads(min(8, os.cpu_count())) | |
# Carregar modelo com configurações otimizadas | |
cache_dir = os.path.join(tempfile.gettempdir(), 'weapon_detection_cache') | |
os.makedirs(cache_dir, exist_ok=True) | |
model_name = "google/owlv2-base-patch16" | |
logger.info("Carregando modelo e processador...") | |
self.owlv2_processor = Owlv2Processor.from_pretrained( | |
model_name, | |
cache_dir=cache_dir | |
) | |
self.owlv2_model = Owlv2ForObjectDetection.from_pretrained( | |
model_name, | |
cache_dir=cache_dir, | |
torch_dtype=torch.float32, | |
low_cpu_mem_usage=True | |
).to(self.device) | |
self.owlv2_model.eval() | |
# Usar queries do método base | |
self.text_queries = self._get_detection_queries() | |
logger.info(f"Total de queries carregadas: {len(self.text_queries)}") | |
# Processar queries uma única vez | |
logger.info("Processando queries...") | |
self.processed_text = self.owlv2_processor( | |
text=self.text_queries, | |
return_tensors="pt", | |
padding=True | |
).to(self.device) | |
# Inicializar cache | |
cache_size = int(os.getenv('RESULT_CACHE_SIZE', '1000')) | |
self.result_cache = CPUCache(max_size=cache_size) | |
logger.info("Inicialização CPU completa!") | |
self._initialized = True | |
except Exception as e: | |
logger.error(f"Erro na inicialização CPU: {str(e)}") | |
raise | |
def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list: | |
"""Aplica NMS usando operações em CPU.""" | |
try: | |
if not detections: | |
return [] | |
boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections]) | |
scores = torch.tensor([d["confidence"] for d in detections]) | |
labels = [d["label"] for d in detections] | |
area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) | |
_, order = scores.sort(descending=True) | |
keep = [] | |
while order.numel() > 0: | |
if order.numel() == 1: | |
keep.append(order.item()) | |
break | |
i = order[0] | |
keep.append(i.item()) | |
xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0]) | |
yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1]) | |
xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2]) | |
yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3]) | |
w = torch.clamp(xx2 - xx1, min=0) | |
h = torch.clamp(yy2 - yy1, min=0) | |
inter = w * h | |
ovr = inter / (area[i] + area[order[1:]] - inter) | |
ids = (ovr <= iou_threshold).nonzero().squeeze() | |
if ids.numel() == 0: | |
break | |
order = order[ids + 1] | |
filtered_detections = [] | |
for idx in keep: | |
filtered_detections.append({ | |
"confidence": scores[idx].item(), | |
"box": boxes[idx].tolist(), | |
"label": labels[idx] | |
}) | |
return filtered_detections | |
except Exception as e: | |
logger.error(f"Erro ao aplicar NMS: {str(e)}") | |
return [] | |
def _preprocess_image(self, image: Image.Image) -> Image.Image: | |
"""Pré-processa a imagem para o tamanho 640x640 e garante RGB.""" | |
try: | |
target_size = (640, 640) | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
if image.size != target_size: | |
ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1]) | |
new_size = tuple(int(dim * ratio) for dim in image.size) | |
image = image.resize(new_size, Image.LANCZOS) | |
if new_size != target_size: | |
new_image = Image.new('RGB', target_size, (0, 0, 0)) | |
paste_x = (target_size[0] - new_size[0]) // 2 | |
paste_y = (target_size[1] - new_size[1]) // 2 | |
new_image.paste(image, (paste_x, paste_y)) | |
image = new_image | |
return image | |
except Exception as e: | |
logger.error(f"Erro no pré-processamento: {str(e)}") | |
return image | |
def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list: | |
"""Detecta objetos em uma imagem utilizando CPU.""" | |
try: | |
image = self._preprocess_image(image) | |
with torch.no_grad(): | |
image_inputs = self.owlv2_processor( | |
images=image, | |
return_tensors="pt" | |
).to(self.device) | |
inputs = {**image_inputs, **self.processed_text} | |
outputs = self.owlv2_model(**inputs) | |
target_sizes = torch.tensor([image.size[::-1]]) | |
results = self.owlv2_processor.post_process_grounded_object_detection( | |
outputs=outputs, | |
target_sizes=target_sizes, | |
threshold=threshold | |
)[0] | |
detections = [] | |
for score, box, label in zip(results["scores"], results["boxes"], results["labels"]): | |
x1, y1, x2, y2 = box.tolist() | |
detections.append({ | |
"confidence": score.item(), | |
"box": [int(x1), int(y1), int(x2), int(y2)], | |
"label": self.text_queries[label] | |
}) | |
return self._apply_nms(detections) | |
except Exception as e: | |
logger.error(f"Erro em detect_objects: {str(e)}") | |
return [] | |
def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple: | |
"""Processa um vídeo utilizando CPU. Para na primeira detecção encontrada.""" | |
try: | |
metrics = { | |
"total_time": 0, | |
"frame_extraction_time": 0, | |
"analysis_time": 0, | |
"frames_analyzed": 0, | |
"video_duration": 0, | |
"device_type": self.device.type, | |
"detections": [], | |
"technical": { | |
"model": "owlv2-base-patch16-ensemble", | |
"input_size": f"{resolution}x{resolution}", | |
"nms_threshold": 0.5, | |
"preprocessing": "basic", | |
"early_stop": True | |
}, | |
} | |
start_time = time.time() | |
t0 = time.time() | |
frames = self.extract_frames(video_path, fps, resolution) | |
metrics["frame_extraction_time"] = time.time() - t0 | |
metrics["frames_analyzed"] = len(frames) | |
if not frames: | |
logger.warning("Nenhum frame extraído do vídeo") | |
return video_path, metrics | |
metrics["video_duration"] = len(frames) / (fps or 2) | |
t0 = time.time() | |
detections = [] | |
frames_processed = 0 | |
# Processar um frame por vez para otimizar memória e permitir parada precoce | |
for frame_idx, frame in enumerate(frames): | |
frames_processed += 1 | |
# Converter frame para RGB e pré-processar | |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
image = Image.fromarray(frame_rgb) | |
image = self._preprocess_image(image) | |
# Detectar objetos com threshold direto | |
with torch.no_grad(): | |
image_inputs = self.owlv2_processor( | |
images=image, | |
return_tensors="pt" | |
).to(self.device) | |
inputs = {**image_inputs, **self.processed_text} | |
outputs = self.owlv2_model(**inputs) | |
target_sizes = torch.tensor([image.size[::-1]]) | |
results = self.owlv2_processor.post_process_grounded_object_detection( | |
outputs=outputs, | |
target_sizes=target_sizes, | |
threshold=threshold # Aplicar threshold diretamente | |
)[0] | |
# Se encontrou alguma detecção acima do threshold | |
if len(results["scores"]) > 0: | |
# Pegar a detecção com maior confiança | |
max_score_idx = torch.argmax(results["scores"]) | |
score = results["scores"][max_score_idx].item() | |
box = results["boxes"][max_score_idx].tolist() | |
label = results["labels"][max_score_idx].item() | |
detections.append({ | |
"frame": frame_idx, | |
"confidence": score, | |
"box": [int(x) for x in box], | |
"label": self.text_queries[label] | |
}) | |
# Atualizar métricas e parar o processamento | |
metrics["frames_processed_until_detection"] = frames_processed | |
metrics["analysis_time"] = time.time() - t0 | |
metrics["total_time"] = time.time() - start_time | |
metrics["detections"] = detections | |
logger.info(f"Detecção encontrada após processar {frames_processed} frames") | |
return video_path, metrics | |
# Liberar memória a cada 10 frames | |
if frames_processed % 10 == 0: | |
gc.collect() | |
# Se chegou aqui, não encontrou nenhuma detecção | |
metrics["analysis_time"] = time.time() - t0 | |
metrics["total_time"] = time.time() - start_time | |
metrics["frames_processed_until_detection"] = frames_processed | |
metrics["detections"] = detections | |
return video_path, metrics | |
except Exception as e: | |
logger.error(f"Erro ao processar vídeo: {str(e)}") | |
return video_path, {} | |
def extract_frames(self, video_path: str, fps: int = 2, resolution: int = 480) -> list: | |
"""Extrai frames de um vídeo utilizando ffmpeg.""" | |
frames = [] | |
temp_dir = Path(tempfile.mkdtemp()) | |
try: | |
threads = min(os.cpu_count(), 4) # Menor número de threads para CPU | |
cmd = [ | |
'ffmpeg', '-i', video_path, | |
'-threads', str(threads), | |
'-vf', (f'fps={fps},' | |
f'scale={resolution}:{resolution}:force_original_aspect_ratio=decrease:flags=lanczos,' | |
f'pad={resolution}:{resolution}:(ow-iw)/2:(oh-ih)/2'), | |
'-frame_pts', '1', | |
f'{temp_dir}/%d.jpg' | |
] | |
subprocess.run(cmd, check=True, capture_output=True) | |
frame_files = sorted(temp_dir.glob('*.jpg'), key=lambda x: int(x.stem)) | |
chunk_size = 50 # Menor chunk size para CPU | |
with ThreadPoolExecutor(max_workers=threads) as executor: | |
for i in range(0, len(frame_files), chunk_size): | |
chunk = frame_files[i:i + chunk_size] | |
chunk_frames = list(tqdm( | |
executor.map(lambda f: cv2.imread(str(f)), chunk), | |
desc=f"Carregando frames {i+1}-{min(i+chunk_size, len(frame_files))}", | |
total=len(chunk) | |
)) | |
frames.extend(chunk_frames) | |
if i % (chunk_size * 5) == 0: | |
gc.collect() | |
finally: | |
shutil.rmtree(temp_dir) | |
return frames | |
def clear_cache(self): | |
"""Limpa o cache de resultados e libera memória.""" | |
try: | |
if hasattr(self, 'result_cache'): | |
self.result_cache.clear() | |
gc.collect() | |
logger.info("Cache CPU limpo com sucesso") | |
except Exception as e: | |
logger.error(f"Erro ao limpar cache CPU: {str(e)}") | |
def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list: | |
"""Aplica NMS usando operações em CPU.""" | |
try: | |
if not detections: | |
return [] | |
boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections]) | |
scores = torch.tensor([d["confidence"] for d in detections]) | |
labels = [d["label"] for d in detections] | |
area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) | |
_, order = scores.sort(descending=True) | |
keep = [] | |
while order.numel() > 0: | |
if order.numel() == 1: | |
keep.append(order.item()) | |
break | |
i = order[0] | |
keep.append(i.item()) | |
xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0]) | |
yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1]) | |
xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2]) | |
yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3]) | |
w = torch.clamp(xx2 - xx1, min=0) | |
h = torch.clamp(yy2 - yy1, min=0) | |
inter = w * h | |
ovr = inter / (area[i] + area[order[1:]] - inter) | |
ids = (ovr <= iou_threshold).nonzero().squeeze() | |
if ids.numel() == 0: | |
break | |
order = order[ids + 1] | |
filtered_detections = [] | |
for idx in keep: | |
filtered_detections.append({ | |
"confidence": scores[idx].item(), | |
"box": boxes[idx].tolist(), | |
"label": labels[idx] | |
}) | |
return filtered_detections | |
except Exception as e: | |
logger.error(f"Erro ao aplicar NMS: {str(e)}") | |
return [] | |
def _preprocess_image(self, image: Image.Image) -> Image.Image: | |
"""Pré-processa a imagem para o tamanho 640x640 e garante RGB.""" | |
try: | |
target_size = (640, 640) | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
if image.size != target_size: | |
ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1]) | |
new_size = tuple(int(dim * ratio) for dim in image.size) | |
image = image.resize(new_size, Image.LANCZOS) | |
if new_size != target_size: | |
new_image = Image.new('RGB', target_size, (0, 0, 0)) | |
paste_x = (target_size[0] - new_size[0]) // 2 | |
paste_y = (target_size[1] - new_size[1]) // 2 | |
new_image.paste(image, (paste_x, paste_y)) | |
image = new_image | |
return image | |
except Exception as e: | |
logger.error(f"Erro no pré-processamento: {str(e)}") | |
return image | |
def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list: | |
"""Detecta objetos em uma imagem utilizando CPU.""" | |
try: | |
image = self._preprocess_image(image) | |
with torch.no_grad(): | |
image_inputs = self.owlv2_processor( | |
images=image, | |
return_tensors="pt" | |
).to(self.device) | |
inputs = {**image_inputs, **self.processed_text} | |
outputs = self.owlv2_model(**inputs) | |
target_sizes = torch.tensor([image.size[::-1]]) | |
results = self.owlv2_processor.post_process_grounded_object_detection( | |
outputs=outputs, | |
target_sizes=target_sizes, | |
threshold=threshold | |
)[0] | |
detections = [] | |
for score, box, label in zip(results["scores"], results["boxes"], results["labels"]): | |
x1, y1, x2, y2 = box.tolist() | |
detections.append({ | |
"confidence": score.item(), | |
"box": [int(x1), int(y1), int(x2), int(y2)], | |
"label": self.text_queries[label] | |
}) | |
return self._apply_nms(detections) | |
except Exception as e: | |
logger.error(f"Erro em detect_objects: {str(e)}") | |
return [] | |
def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple: | |
"""Processa um vídeo utilizando CPU. Para na primeira detecção encontrada.""" | |
try: | |
metrics = { | |
"total_time": 0, | |
"frame_extraction_time": 0, | |
"analysis_time": 0, | |
"frames_analyzed": 0, | |
"video_duration": 0, | |
"device_type": self.device.type, | |
"detections": [], | |
"technical": { | |
"model": "owlv2-base-patch16-ensemble", | |
"input_size": f"{resolution}x{resolution}", | |
"nms_threshold": 0.5, | |
"preprocessing": "basic", | |
"early_stop": True | |
}, | |
} | |
start_time = time.time() | |
t0 = time.time() | |
frames = self.extract_frames(video_path, fps, resolution) | |
metrics["frame_extraction_time"] = time.time() - t0 | |
metrics["frames_analyzed"] = len(frames) | |
if not frames: | |
logger.warning("Nenhum frame extraído do vídeo") | |
return video_path, metrics | |
metrics["video_duration"] = len(frames) / (fps or 2) | |
t0 = time.time() | |
detections = [] | |
frames_processed = 0 | |
# Processar um frame por vez para otimizar memória e permitir parada precoce | |
for frame_idx, frame in enumerate(frames): | |
frames_processed += 1 | |
# Converter frame para RGB e pré-processar | |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
image = Image.fromarray(frame_rgb) | |
image = self._preprocess_image(image) | |
# Detectar objetos com threshold direto | |
with torch.no_grad(): | |
image_inputs = self.owlv2_processor( | |
images=image, | |
return_tensors="pt" | |
).to(self.device) | |
inputs = {**image_inputs, **self.processed_text} | |
outputs = self.owlv2_model(**inputs) | |
target_sizes = torch.tensor([image.size[::-1]]) | |
results = self.owlv2_processor.post_process_grounded_object_detection( | |
outputs=outputs, | |
target_sizes=target_sizes, | |
threshold=threshold # Aplicar threshold diretamente | |
)[0] | |
# Se encontrou alguma detecção acima do threshold | |
if len(results["scores"]) > 0: | |
# Pegar a detecção com maior confiança | |
max_score_idx = torch.argmax(results["scores"]) | |
score = results["scores"][max_score_idx].item() | |
box = results["boxes"][max_score_idx].tolist() | |
label = results["labels"][max_score_idx].item() | |
detections.append({ | |
"frame": frame_idx, | |
"confidence": score, | |
"box": [int(x) for x in box], | |
"label": self.text_queries[label] | |
}) | |
# Atualizar métricas e parar o processamento | |
metrics["frames_processed_until_detection"] = frames_processed | |
metrics["analysis_time"] = time.time() - t0 | |
metrics["total_time"] = time.time() - start_time | |
metrics["detections"] = detections | |
logger.info(f"Detecção encontrada após processar {frames_processed} frames") | |
return video_path, metrics | |
# Liberar memória a cada 10 frames | |
if frames_processed % 10 == 0: | |
gc.collect() | |
# Se chegou aqui, não encontrou nenhuma detecção | |
metrics["analysis_time"] = time.time() - t0 | |
metrics["total_time"] = time.time() - start_time | |
metrics["frames_processed_until_detection"] = frames_processed | |
metrics["detections"] = detections | |
return video_path, metrics | |
except Exception as e: | |
logger.error(f"Erro ao processar vídeo: {str(e)}") | |
return video_path, {} | |
def extract_frames(self, video_path: str, fps: int = 2, resolution: int = 480) -> list: | |
"""Extrai frames de um vídeo utilizando ffmpeg.""" | |
frames = [] | |
temp_dir = Path(tempfile.mkdtemp()) | |
try: | |
threads = min(os.cpu_count(), 4) # Menor número de threads para CPU | |
cmd = [ | |
'ffmpeg', '-i', video_path, | |
'-threads', str(threads), | |
'-vf', (f'fps={fps},' | |
f'scale={resolution}:{resolution}:force_original_aspect_ratio=decrease:flags=lanczos,' | |
f'pad={resolution}:{resolution}:(ow-iw)/2:(oh-ih)/2'), | |
'-frame_pts', '1', | |
f'{temp_dir}/%d.jpg' | |
] | |
subprocess.run(cmd, check=True, capture_output=True) | |
frame_files = sorted(temp_dir.glob('*.jpg'), key=lambda x: int(x.stem)) | |
chunk_size = 50 # Menor chunk size para CPU | |
with ThreadPoolExecutor(max_workers=threads) as executor: | |
for i in range(0, len(frame_files), chunk_size): | |
chunk = frame_files[i:i + chunk_size] | |
chunk_frames = list(tqdm( | |
executor.map(lambda f: cv2.imread(str(f)), chunk), | |
desc=f"Carregando frames {i+1}-{min(i+chunk_size, len(frame_files))}", | |
total=len(chunk) | |
)) | |
frames.extend(chunk_frames) | |
if i % (chunk_size * 5) == 0: | |
gc.collect() | |
finally: | |
shutil.rmtree(temp_dir) | |
return frames | |
def clear_cache(self): | |
"""Limpa cache e libera memória.""" | |
self.result_cache.clear() | |
gc.collect() |