Marcus Vinicius Zerbini Canhaço commited on
Commit
a83ece3
·
1 Parent(s): 479ff3a

feat: atualização do detector com otimizações para GPU T4

Browse files
Files changed (1) hide show
  1. src/domain/detectors/gpu.py +60 -107
src/domain/detectors/gpu.py CHANGED
@@ -149,6 +149,10 @@ class WeaponDetectorGPU(BaseDetector):
149
  try:
150
  start_time = time.time()
151
 
 
 
 
 
152
  # Extrair frames
153
  t0 = time.time()
154
  frames = self.extract_frames(video_path, fps or 2, resolution)
@@ -164,115 +168,51 @@ class WeaponDetectorGPU(BaseDetector):
164
 
165
  # Processar frames em batch
166
  t0 = time.time()
167
- batch_size = 2 # Reduzido ainda mais para garantir compatibilidade
168
  detections_by_frame = []
169
 
170
- for i in range(0, len(frames), batch_size):
 
 
 
 
171
  try:
172
- batch_frames = frames[i:i + batch_size]
173
- batch_pil_frames = []
174
-
175
- # Preparar batch
176
- for frame in batch_frames:
177
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
178
  frame_pil = Image.fromarray(frame_rgb)
179
- frame_pil = self._preprocess_image(frame_pil)
180
- batch_pil_frames.append(frame_pil)
 
181
 
182
- # Processar batch
183
- batch_inputs = self.owlv2_processor(
184
- images=batch_pil_frames,
185
- return_tensors="pt",
186
- padding=True
187
  )
188
-
189
- # Validar shapes antes da inferência
190
- if not self._validate_batch_shapes(batch_inputs):
191
- logger.warning(f"Shape inválido detectado no batch {i}, processando frames individualmente...")
192
- # Processar frames individualmente
193
- for frame_idx, frame_pil in enumerate(batch_pil_frames):
194
- try:
195
- single_input = self.owlv2_processor(
196
- images=frame_pil,
197
- return_tensors="pt"
198
- )
199
- single_input = {
200
- key: val.to(self.device)
201
- for key, val in single_input.items()
202
- }
203
-
204
- with torch.no_grad():
205
- inputs = {**single_input, **self.processed_text}
206
- outputs = self.owlv2_model(**inputs)
207
-
208
- target_sizes = torch.tensor([frame_pil.size[::-1]], device=self.device)
209
- results = self.owlv2_processor.post_process_grounded_object_detection(
210
- outputs=outputs,
211
- target_sizes=target_sizes,
212
- threshold=threshold
213
- )
214
-
215
- if len(results[0]["scores"]) > 0:
216
- scores = results[0]["scores"]
217
- boxes = results[0]["boxes"]
218
- labels = results[0]["labels"]
219
-
220
- frame_detections = []
221
- for score, box, label in zip(scores, boxes, labels):
222
- score_val = score.item()
223
- if score_val >= threshold:
224
- label_idx = min(label.item(), len(self.text_queries) - 1)
225
- label_text = self.text_queries[label_idx]
226
- frame_detections.append({
227
- "confidence": round(score_val * 100, 2),
228
- "box": [int(x) for x in box.tolist()],
229
- "label": label_text,
230
- "frame": i + frame_idx,
231
- "timestamp": (i + frame_idx) / (fps or 2)
232
- })
233
-
234
- if frame_detections:
235
- frame_detections = self._apply_nms(frame_detections)
236
- detections_by_frame.extend(frame_detections)
237
-
238
- except Exception as e:
239
- logger.error(f"Erro ao processar frame individual {i + frame_idx}: {str(e)}")
240
- continue
241
-
242
- finally:
243
- if 'single_input' in locals():
244
- del single_input
245
- if 'outputs' in locals():
246
- del outputs
247
- torch.cuda.empty_cache()
248
- continue
249
-
250
- # Processar batch normalmente
251
- batch_inputs = {
252
  key: val.to(self.device)
253
- for key, val in batch_inputs.items()
254
  }
255
 
 
256
  with torch.no_grad():
257
- inputs = {**batch_inputs, **self.processed_text}
258
- outputs = self.owlv2_model(**inputs)
259
 
260
- target_sizes = torch.tensor(
261
- [frame.size[::-1] for frame in batch_pil_frames],
262
- device=self.device
263
- )
264
  results = self.owlv2_processor.post_process_grounded_object_detection(
265
  outputs=outputs,
266
  target_sizes=target_sizes,
267
  threshold=threshold
268
  )
269
-
270
- # Processar resultados do batch
271
- for frame_idx, frame_results in enumerate(results):
272
- if len(frame_results["scores"]) > 0:
273
- scores = frame_results["scores"]
274
- boxes = frame_results["boxes"]
275
- labels = frame_results["labels"]
276
 
277
  frame_detections = []
278
  for score, box, label in zip(scores, boxes, labels):
@@ -284,27 +224,29 @@ class WeaponDetectorGPU(BaseDetector):
284
  "confidence": round(score_val * 100, 2),
285
  "box": [int(x) for x in box.tolist()],
286
  "label": label_text,
287
- "frame": i + frame_idx,
288
- "timestamp": (i + frame_idx) / (fps or 2)
289
  })
290
 
291
  if frame_detections:
292
  frame_detections = self._apply_nms(frame_detections)
293
  detections_by_frame.extend(frame_detections)
294
 
295
- except RuntimeError as e:
296
- logger.error(f"Erro no processamento do batch {i}: {str(e)}")
297
- if "out of memory" in str(e):
298
- torch.cuda.empty_cache()
299
- gc.collect()
300
  continue
301
 
302
  finally:
303
- # Liberar memória do batch
304
- del batch_inputs
 
305
  if 'outputs' in locals():
306
  del outputs
307
  torch.cuda.empty_cache()
 
 
 
 
308
 
309
  # Atualizar métricas finais
310
  metrics["analysis_time"] = time.time() - t0
@@ -342,26 +284,37 @@ class WeaponDetectorGPU(BaseDetector):
342
  def _preprocess_image(self, image: Image.Image) -> Image.Image:
343
  """Pré-processa a imagem para o formato esperado pelo modelo."""
344
  try:
345
- # Converter para RGB se necessário
 
 
 
 
346
  if image.mode != 'RGB':
347
  image = image.convert('RGB')
348
 
349
- # Redimensionar mantendo proporção
350
  target_size = (self.default_resolution, self.default_resolution)
351
  if image.size != target_size:
 
352
  ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
353
  new_size = tuple(int(dim * ratio) for dim in image.size)
354
- image = image.resize(new_size, Image.Resampling.LANCZOS)
355
-
356
- # Adicionar padding se necessário
357
- if new_size != target_size:
 
 
358
  new_image = Image.new('RGB', target_size, (0, 0, 0))
 
359
  paste_x = (target_size[0] - new_size[0]) // 2
360
  paste_y = (target_size[1] - new_size[1]) // 2
361
  new_image.paste(image, (paste_x, paste_y))
362
  image = new_image
363
 
 
 
364
  return image
 
365
  except Exception as e:
366
  logger.error(f"Erro no pré-processamento: {str(e)}")
367
  return image
 
149
  try:
150
  start_time = time.time()
151
 
152
+ # Limpar cache de GPU antes de começar
153
+ torch.cuda.empty_cache()
154
+ gc.collect()
155
+
156
  # Extrair frames
157
  t0 = time.time()
158
  frames = self.extract_frames(video_path, fps or 2, resolution)
 
168
 
169
  # Processar frames em batch
170
  t0 = time.time()
171
+ batch_size = 1 # Processar um frame por vez para garantir compatibilidade
172
  detections_by_frame = []
173
 
174
+ # Pré-alocar tensores para evitar alocações frequentes
175
+ with torch.cuda.device(self.device):
176
+ torch.cuda.empty_cache() # Limpar memória antes de começar
177
+
178
+ for i in range(0, len(frames)):
179
  try:
180
+ # Preparar frame com otimização de memória
181
+ frame = frames[i]
182
+ if isinstance(frame, np.ndarray):
 
 
183
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
184
  frame_pil = Image.fromarray(frame_rgb)
185
+ else:
186
+ frame_pil = frame
187
+ frame_pil = self._preprocess_image(frame_pil)
188
 
189
+ # Processar frame
190
+ inputs = self.owlv2_processor(
191
+ images=frame_pil,
192
+ return_tensors="pt"
 
193
  )
194
+ inputs = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  key: val.to(self.device)
196
+ for key, val in inputs.items()
197
  }
198
 
199
+ # Inferência
200
  with torch.no_grad():
201
+ model_inputs = {**inputs, **self.processed_text}
202
+ outputs = self.owlv2_model(**model_inputs)
203
 
204
+ target_sizes = torch.tensor([frame_pil.size[::-1]], device=self.device)
 
 
 
205
  results = self.owlv2_processor.post_process_grounded_object_detection(
206
  outputs=outputs,
207
  target_sizes=target_sizes,
208
  threshold=threshold
209
  )
210
+
211
+ # Processar resultados
212
+ if len(results[0]["scores"]) > 0:
213
+ scores = results[0]["scores"]
214
+ boxes = results[0]["boxes"]
215
+ labels = results[0]["labels"]
 
216
 
217
  frame_detections = []
218
  for score, box, label in zip(scores, boxes, labels):
 
224
  "confidence": round(score_val * 100, 2),
225
  "box": [int(x) for x in box.tolist()],
226
  "label": label_text,
227
+ "frame": i,
228
+ "timestamp": i / (fps or 2)
229
  })
230
 
231
  if frame_detections:
232
  frame_detections = self._apply_nms(frame_detections)
233
  detections_by_frame.extend(frame_detections)
234
 
235
+ except Exception as e:
236
+ logger.error(f"Erro ao processar frame {i}: {str(e)}")
 
 
 
237
  continue
238
 
239
  finally:
240
+ # Liberar memória
241
+ if 'inputs' in locals():
242
+ del inputs
243
  if 'outputs' in locals():
244
  del outputs
245
  torch.cuda.empty_cache()
246
+
247
+ # Log de progresso
248
+ if i % 10 == 0:
249
+ logger.info(f"Processados {i}/{len(frames)} frames")
250
 
251
  # Atualizar métricas finais
252
  metrics["analysis_time"] = time.time() - t0
 
284
  def _preprocess_image(self, image: Image.Image) -> Image.Image:
285
  """Pré-processa a imagem para o formato esperado pelo modelo."""
286
  try:
287
+ # Cache de tamanho para evitar redimensionamentos desnecessários
288
+ if hasattr(self, '_last_size') and self._last_size == image.size:
289
+ return image
290
+
291
+ # Converter para RGB se necessário usando conversão direta
292
  if image.mode != 'RGB':
293
  image = image.convert('RGB')
294
 
295
+ # Redimensionar mantendo proporção com otimização de memória
296
  target_size = (self.default_resolution, self.default_resolution)
297
  if image.size != target_size:
298
+ # Calcular novo tamanho uma única vez
299
  ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
300
  new_size = tuple(int(dim * ratio) for dim in image.size)
301
+
302
+ # Redimensionar diretamente para o tamanho final se possível
303
+ if new_size == target_size:
304
+ image = image.resize(target_size, Image.Resampling.BILINEAR)
305
+ else:
306
+ # Criar imagem com padding em uma única operação
307
  new_image = Image.new('RGB', target_size, (0, 0, 0))
308
+ image = image.resize(new_size, Image.Resampling.BILINEAR)
309
  paste_x = (target_size[0] - new_size[0]) // 2
310
  paste_y = (target_size[1] - new_size[1]) // 2
311
  new_image.paste(image, (paste_x, paste_y))
312
  image = new_image
313
 
314
+ # Armazenar tamanho para cache
315
+ self._last_size = image.size
316
  return image
317
+
318
  except Exception as e:
319
  logger.error(f"Erro no pré-processamento: {str(e)}")
320
  return image