Spaces:

marcuscanhaco
/

weapon-detection-app

Runtime error

App Files Files Community

Marcus Vinicius Zerbini Canhaço commited on Feb 13

Commit

ea56678

1 Parent(s): 87bc3e0

feat: atualização do detector com otimizações para GPU T4

Browse files

Files changed (3) hide show

README.md +1 -1
src/domain/detectors/gpu.py +41 -16
src/presentation/web/gradio_interface.py +20 -3

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ app_port: 7860
 <div align="center">
-# FIAP VisionGuard - Weapon Detection
 *Sistema de Detecção de Armas e Objetos Perigosos*

 <div align="center">
+# FIAP VisionGuard - Risk Detection - Hackatoon 1IADT
 *Sistema de Detecção de Armas e Objetos Perigosos*

src/domain/detectors/gpu.py CHANGED Viewed

@@ -164,21 +164,21 @@ class WeaponDetectorGPU(BaseDetector):
             # Processar frames em batch
             t0 = time.time()
-            batch_size = 8  # Reduzido para evitar erros de memória
             detections_by_frame = []
             for i in range(0, len(frames), batch_size):
-                batch_frames = frames[i:i + batch_size]
-                batch_pil_frames = []
-                # Preparar batch
-                for frame in batch_frames:
-                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                    frame_pil = Image.fromarray(frame_rgb)
-                    frame_pil = self._preprocess_image(frame_pil)
-                    batch_pil_frames.append(frame_pil)
                 try:
                     # Processar batch
                     batch_inputs = self.owlv2_processor(
                         images=batch_pil_frames,
@@ -190,6 +190,11 @@ class WeaponDetectorGPU(BaseDetector):
                         for key, val in batch_inputs.items()
                     }
                     # Inferência em batch
                     with torch.no_grad():
                         inputs = {**batch_inputs, **self.processed_text}
@@ -222,18 +227,16 @@ class WeaponDetectorGPU(BaseDetector):
                                         "confidence": round(score_val * 100, 2),
                                         "box": [int(x) for x in box.tolist()],
                                         "label": label_text,
                                         "timestamp": (i + frame_idx) / (fps or 2)
                                     })
                             if frame_detections:
                                 frame_detections = self._apply_nms(frame_detections)
-                                detections_by_frame.append({
-                                    "frame": i + frame_idx,
-                                    "detections": frame_detections
-                                })
                 except RuntimeError as e:
-                    logger.error(f"Erro no processamento do batch: {str(e)}")
                     if "out of memory" in str(e):
                         torch.cuda.empty_cache()
                         gc.collect()
@@ -257,6 +260,28 @@ class WeaponDetectorGPU(BaseDetector):
             logger.error(f"Erro ao processar vídeo: {str(e)}")
             return video_path, metrics
     def _preprocess_image(self, image: Image.Image) -> Image.Image:
         """Pré-processa a imagem para o formato esperado pelo modelo."""
         try:

             # Processar frames em batch
             t0 = time.time()
+            batch_size = 4  # Reduzido para evitar erros de shape
             detections_by_frame = []
             for i in range(0, len(frames), batch_size):
                 try:
+                    batch_frames = frames[i:i + batch_size]
+                    batch_pil_frames = []
+                    # Preparar batch
+                    for frame in batch_frames:
+                        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                        frame_pil = Image.fromarray(frame_rgb)
+                        frame_pil = self._preprocess_image(frame_pil)
+                        batch_pil_frames.append(frame_pil)
                     # Processar batch
                     batch_inputs = self.owlv2_processor(
                         images=batch_pil_frames,
                         for key, val in batch_inputs.items()
                     }
+                    # Validar shapes antes da inferência
+                    if not self._validate_batch_shapes(batch_inputs):
+                        logger.warning(f"Shape inválido detectado no batch {i}, pulando...")
+                        continue
                     # Inferência em batch
                     with torch.no_grad():
                         inputs = {**batch_inputs, **self.processed_text}
                                         "confidence": round(score_val * 100, 2),
                                         "box": [int(x) for x in box.tolist()],
                                         "label": label_text,
+                                        "frame": i + frame_idx,
                                         "timestamp": (i + frame_idx) / (fps or 2)
                                     })
                             if frame_detections:
                                 frame_detections = self._apply_nms(frame_detections)
+                                detections_by_frame.extend(frame_detections)
                 except RuntimeError as e:
+                    logger.error(f"Erro no processamento do batch {i}: {str(e)}")
                     if "out of memory" in str(e):
                         torch.cuda.empty_cache()
                         gc.collect()
             logger.error(f"Erro ao processar vídeo: {str(e)}")
             return video_path, metrics
+    def _validate_batch_shapes(self, batch_inputs: Dict) -> bool:
+        """Valida os shapes dos tensores do batch."""
+        try:
+            pixel_values = batch_inputs.get("pixel_values")
+            if pixel_values is None:
+                return False
+            batch_size = pixel_values.shape[0]
+            if batch_size == 0:
+                return False
+            # Validar dimensões esperadas
+            expected_dims = 4  # [batch_size, channels, height, width]
+            if len(pixel_values.shape) != expected_dims:
+                return False
+            return True
+        except Exception as e:
+            logger.error(f"Erro ao validar shapes do batch: {str(e)}")
+            return False
     def _preprocess_image(self, image: Image.Image) -> Image.Image:
         """Pré-processa a imagem para o formato esperado pelo modelo."""
         try:

src/presentation/web/gradio_interface.py CHANGED Viewed

@@ -127,7 +127,7 @@ class GradioInterface:
     def create_interface(self) -> gr.Blocks:
         """Cria a interface Gradio."""
-        title = "Detector de Riscos em Vídeos"
         sample_videos = self.list_sample_videos()
         with gr.Blocks(
@@ -135,7 +135,7 @@ class GradioInterface:
             theme=gr.themes.Ocean(),
             css="footer {display: none !important}"
         ) as demo:
-            gr.Markdown(f"""# 🚨 {title}
             Faça upload de um vídeo para detectar objetos perigosos.
             Opcionalmente, configure notificações para receber alertas em caso de detecções.
@@ -309,7 +309,24 @@ class GradioInterface:
                 status_html += f"<li>... e mais {len(response.detection_result.detections) - 5} detecção(ões)</li>"
             status_html += "</ul></div>"
         return (
             response.status_message,
-            status_html
         )

     def create_interface(self) -> gr.Blocks:
         """Cria a interface Gradio."""
+        title = "FIAP VisionGuard - Risk Detection - Hackatoon 1IADT"
         sample_videos = self.list_sample_videos()
         with gr.Blocks(
             theme=gr.themes.Ocean(),
             css="footer {display: none !important}"
         ) as demo:
+            gr.Markdown(f"""# 🎯 {title} 🔪🔫
             Faça upload de um vídeo para detectar objetos perigosos.
             Opcionalmente, configure notificações para receber alertas em caso de detecções.
                 status_html += f"<li>... e mais {len(response.detection_result.detections) - 5} detecção(ões)</li>"
             status_html += "</ul></div>"
+        # Preparar JSON técnico
+        technical_data = {
+            "device_type": response.detection_result.device_type,
+            "total_detections": len(response.detection_result.detections),
+            "frames_analyzed": response.detection_result.frames_analyzed,
+            "total_time": round(response.detection_result.total_time, 2),
+            "detections": [
+                {
+                    "label": det.label,
+                    "confidence": round(det.confidence * 100 if det.confidence <= 1.0 else det.confidence, 2),
+                    "frame": det.frame,
+                    "timestamp": round(det.timestamp, 2) if hasattr(det, "timestamp") else None
+                }
+                for det in response.detection_result.detections[:10]  # Limitar a 10 detecções no JSON
+            ]
+        }
         return (
             response.status_message,
+            technical_data  # Retorna dicionário Python em vez de HTML
         )