import torch
from transformers import Owlv2Processor, Owlv2ForObjectDetection
from PIL import Image
import numpy as np
import cv2
import time
from typing import List, Dict, Tuple, Optional, Union
import os
from tqdm import tqdm
import json
from pathlib import Path
from contextlib import nullcontext
import threading
import hashlib
import pickle
from datetime import datetime
from dotenv import load_dotenv
import tempfile
import subprocess
import shutil
import traceback
import psutil
import logging
import gc
import sys
from concurrent.futures import ThreadPoolExecutor
import torch.nn.functional as F
from .base import BaseDetector, BaseCache


# Configurar logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Carregar variáveis de ambiente
load_dotenv()

class CPUCache(BaseCache):
    """Cache otimizado para CPU."""
    def __init__(self, max_size: int = 1000):
        super().__init__(max_size)
        self.device = torch.device('cpu')

class WeaponDetectorCPU(BaseDetector):
    """Implementação CPU do detector de armas."""
    def __init__(self):
        """Inicializa variáveis básicas."""
        super().__init__()
        self.default_resolution = 640
        self.device = torch.device('cpu')

    def _get_best_device(self):
        return torch.device('cpu')

    def _initialize(self):
        """Inicializa o modelo e o processador para execução em CPU."""
        try:
            # Configurações otimizadas para CPU
            torch.set_num_threads(min(8, os.cpu_count()))
            torch.set_num_interop_threads(min(8, os.cpu_count()))
            
            # Carregar modelo com configurações otimizadas
            cache_dir = os.path.join(tempfile.gettempdir(), 'weapon_detection_cache')
            os.makedirs(cache_dir, exist_ok=True)
            
            model_name = "google/owlv2-base-patch16"
            logger.info("Carregando modelo e processador...")
            
            self.owlv2_processor = Owlv2Processor.from_pretrained(
                model_name,
                cache_dir=cache_dir
            )

            self.owlv2_model = Owlv2ForObjectDetection.from_pretrained(
                model_name,
                cache_dir=cache_dir,
                torch_dtype=torch.float32,
                low_cpu_mem_usage=True
            ).to(self.device)

            self.owlv2_model.eval()

            # Usar queries do método base
            self.text_queries = self._get_detection_queries()
            logger.info(f"Total de queries carregadas: {len(self.text_queries)}")
            
            # Processar queries uma única vez
            logger.info("Processando queries...")
            self.processed_text = self.owlv2_processor(
                text=self.text_queries,
                return_tensors="pt",
                padding=True
            ).to(self.device)

            # Inicializar cache
            cache_size = int(os.getenv('RESULT_CACHE_SIZE', '1000'))
            self.result_cache = CPUCache(max_size=cache_size)

            logger.info("Inicialização CPU completa!")
            self._initialized = True

        except Exception as e:
            logger.error(f"Erro na inicialização CPU: {str(e)}")
            raise

    def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list:
        """Aplica NMS usando operações em CPU."""
        try:
            if not detections:
                return []

            boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections])
            scores = torch.tensor([d["confidence"] for d in detections])
            labels = [d["label"] for d in detections]

            area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
            _, order = scores.sort(descending=True)

            keep = []
            while order.numel() > 0:
                if order.numel() == 1:
                    keep.append(order.item())
                    break
                i = order[0]
                keep.append(i.item())

                xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0])
                yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1])
                xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2])
                yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3])

                w = torch.clamp(xx2 - xx1, min=0)
                h = torch.clamp(yy2 - yy1, min=0)
                inter = w * h

                ovr = inter / (area[i] + area[order[1:]] - inter)
                ids = (ovr <= iou_threshold).nonzero().squeeze()
                if ids.numel() == 0:
                    break
                order = order[ids + 1]

            filtered_detections = []
            for idx in keep:
                filtered_detections.append({
                    "confidence": scores[idx].item(),
                    "box": boxes[idx].tolist(),
                    "label": labels[idx]
                })
            return filtered_detections

        except Exception as e:
            logger.error(f"Erro ao aplicar NMS: {str(e)}")
            return []

    def _preprocess_image(self, image: Image.Image) -> Image.Image:
        """Pré-processa a imagem para o tamanho 640x640 e garante RGB."""
        try:
            target_size = (640, 640)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            if image.size != target_size:
                ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
                new_size = tuple(int(dim * ratio) for dim in image.size)
                image = image.resize(new_size, Image.LANCZOS)
                if new_size != target_size:
                    new_image = Image.new('RGB', target_size, (0, 0, 0))
                    paste_x = (target_size[0] - new_size[0]) // 2
                    paste_y = (target_size[1] - new_size[1]) // 2
                    new_image.paste(image, (paste_x, paste_y))
                    image = new_image
            return image
        except Exception as e:
            logger.error(f"Erro no pré-processamento: {str(e)}")
            return image

    def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
        """Detecta objetos em uma imagem utilizando CPU."""
        try:
            image = self._preprocess_image(image)
            with torch.no_grad():
                image_inputs = self.owlv2_processor(
                    images=image,
                    return_tensors="pt"
                ).to(self.device)
                inputs = {**image_inputs, **self.processed_text}
                outputs = self.owlv2_model(**inputs)

                target_sizes = torch.tensor([image.size[::-1]])
                results = self.owlv2_processor.post_process_grounded_object_detection(
                    outputs=outputs,
                    target_sizes=target_sizes,
                    threshold=threshold
                )[0]

                detections = []
                for score, box, label in zip(results["scores"], results["boxes"], results["labels"]):
                    x1, y1, x2, y2 = box.tolist()
                    detections.append({
                        "confidence": score.item(),
                        "box": [int(x1), int(y1), int(x2), int(y2)],
                        "label": self.text_queries[label]
                    })
                return self._apply_nms(detections)

        except Exception as e:
            logger.error(f"Erro em detect_objects: {str(e)}")
            return []

    def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
        """Processa um vídeo utilizando CPU. Para na primeira detecção encontrada."""
        try:
            metrics = {
                "total_time": 0,
                "frame_extraction_time": 0,
                "analysis_time": 0,
                "frames_analyzed": 0,
                "video_duration": 0,
                "device_type": self.device.type,
                "detections": [],
                "technical": {
                    "model": "owlv2-base-patch16-ensemble",
                    "input_size": f"{resolution}x{resolution}",
                    "nms_threshold": 0.5,
                    "preprocessing": "basic",
                    "early_stop": True
                },
            }

            start_time = time.time()
            t0 = time.time()
            frames = self.extract_frames(video_path, fps, resolution)
            metrics["frame_extraction_time"] = time.time() - t0
            metrics["frames_analyzed"] = len(frames)

            if not frames:
                logger.warning("Nenhum frame extraído do vídeo")
                return video_path, metrics

            metrics["video_duration"] = len(frames) / (fps or 2)
            t0 = time.time()
            detections = []
            frames_processed = 0

            # Processar um frame por vez para otimizar memória e permitir parada precoce
            for frame_idx, frame in enumerate(frames):
                frames_processed += 1
                
                # Converter frame para RGB e pré-processar
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame_rgb)
                image = self._preprocess_image(image)

                # Detectar objetos com threshold direto
                with torch.no_grad():
                    image_inputs = self.owlv2_processor(
                        images=image,
                        return_tensors="pt"
                    ).to(self.device)
                    inputs = {**image_inputs, **self.processed_text}
                    outputs = self.owlv2_model(**inputs)

                    target_sizes = torch.tensor([image.size[::-1]])
                    results = self.owlv2_processor.post_process_grounded_object_detection(
                        outputs=outputs,
                        target_sizes=target_sizes,
                        threshold=threshold  # Aplicar threshold diretamente
                    )[0]

                    # Se encontrou alguma detecção acima do threshold
                    if len(results["scores"]) > 0:
                        # Pegar a detecção com maior confiança
                        max_score_idx = torch.argmax(results["scores"])
                        score = results["scores"][max_score_idx].item()
                        box = results["boxes"][max_score_idx].tolist()
                        label = results["labels"][max_score_idx].item()

                        detections.append({
                            "frame": frame_idx,
                            "confidence": score,
                            "box": [int(x) for x in box],
                            "label": self.text_queries[label]
                        })

                        # Atualizar métricas e parar o processamento
                        metrics["frames_processed_until_detection"] = frames_processed
                        metrics["analysis_time"] = time.time() - t0
                        metrics["total_time"] = time.time() - start_time
                        metrics["detections"] = detections
                        logger.info(f"Detecção encontrada após processar {frames_processed} frames")
                        return video_path, metrics

                # Liberar memória a cada 10 frames
                if frames_processed % 10 == 0:
                    gc.collect()

            # Se chegou aqui, não encontrou nenhuma detecção
            metrics["analysis_time"] = time.time() - t0
            metrics["total_time"] = time.time() - start_time
            metrics["frames_processed_until_detection"] = frames_processed
            metrics["detections"] = detections
            return video_path, metrics

        except Exception as e:
            logger.error(f"Erro ao processar vídeo: {str(e)}")
            return video_path, {}

    def extract_frames(self, video_path: str, fps: int = 2, resolution: int = 480) -> list:
        """Extrai frames de um vídeo utilizando ffmpeg."""
        frames = []
        temp_dir = Path(tempfile.mkdtemp())
        try:
            threads = min(os.cpu_count(), 4)  # Menor número de threads para CPU
            cmd = [
                'ffmpeg', '-i', video_path,
                '-threads', str(threads),
                '-vf', (f'fps={fps},'
                        f'scale={resolution}:{resolution}:force_original_aspect_ratio=decrease:flags=lanczos,'
                        f'pad={resolution}:{resolution}:(ow-iw)/2:(oh-ih)/2'),
                '-frame_pts', '1',
                f'{temp_dir}/%d.jpg'
            ]
            subprocess.run(cmd, check=True, capture_output=True)
            frame_files = sorted(temp_dir.glob('*.jpg'), key=lambda x: int(x.stem))
            chunk_size = 50  # Menor chunk size para CPU
            with ThreadPoolExecutor(max_workers=threads) as executor:
                for i in range(0, len(frame_files), chunk_size):
                    chunk = frame_files[i:i + chunk_size]
                    chunk_frames = list(tqdm(
                        executor.map(lambda f: cv2.imread(str(f)), chunk),
                        desc=f"Carregando frames {i+1}-{min(i+chunk_size, len(frame_files))}",
                        total=len(chunk)
                    ))
                    frames.extend(chunk_frames)
                    if i % (chunk_size * 5) == 0:
                        gc.collect()
        finally:
            shutil.rmtree(temp_dir)
        return frames

    def clear_cache(self):
        """Limpa o cache de resultados e libera memória."""
        try:
            if hasattr(self, 'result_cache'):
                self.result_cache.clear()
            gc.collect()
            logger.info("Cache CPU limpo com sucesso")
        except Exception as e:
            logger.error(f"Erro ao limpar cache CPU: {str(e)}")

    def _apply_nms(self, detections: list, iou_threshold: float = 0.5) -> list:
        """Aplica NMS usando operações em CPU."""
        try:
            if not detections:
                return []

            boxes = torch.tensor([[d["box"][0], d["box"][1], d["box"][2], d["box"][3]] for d in detections])
            scores = torch.tensor([d["confidence"] for d in detections])
            labels = [d["label"] for d in detections]

            area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
            _, order = scores.sort(descending=True)

            keep = []
            while order.numel() > 0:
                if order.numel() == 1:
                    keep.append(order.item())
                    break
                i = order[0]
                keep.append(i.item())

                xx1 = torch.max(boxes[i, 0], boxes[order[1:], 0])
                yy1 = torch.max(boxes[i, 1], boxes[order[1:], 1])
                xx2 = torch.min(boxes[i, 2], boxes[order[1:], 2])
                yy2 = torch.min(boxes[i, 3], boxes[order[1:], 3])

                w = torch.clamp(xx2 - xx1, min=0)
                h = torch.clamp(yy2 - yy1, min=0)
                inter = w * h

                ovr = inter / (area[i] + area[order[1:]] - inter)
                ids = (ovr <= iou_threshold).nonzero().squeeze()
                if ids.numel() == 0:
                    break
                order = order[ids + 1]

            filtered_detections = []
            for idx in keep:
                filtered_detections.append({
                    "confidence": scores[idx].item(),
                    "box": boxes[idx].tolist(),
                    "label": labels[idx]
                })
            return filtered_detections

        except Exception as e:
            logger.error(f"Erro ao aplicar NMS: {str(e)}")
            return []

    def _preprocess_image(self, image: Image.Image) -> Image.Image:
        """Pré-processa a imagem para o tamanho 640x640 e garante RGB."""
        try:
            target_size = (640, 640)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            if image.size != target_size:
                ratio = min(target_size[0] / image.size[0], target_size[1] / image.size[1])
                new_size = tuple(int(dim * ratio) for dim in image.size)
                image = image.resize(new_size, Image.LANCZOS)
                if new_size != target_size:
                    new_image = Image.new('RGB', target_size, (0, 0, 0))
                    paste_x = (target_size[0] - new_size[0]) // 2
                    paste_y = (target_size[1] - new_size[1]) // 2
                    new_image.paste(image, (paste_x, paste_y))
                    image = new_image
            return image
        except Exception as e:
            logger.error(f"Erro no pré-processamento: {str(e)}")
            return image

    def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
        """Detecta objetos em uma imagem utilizando CPU."""
        try:
            image = self._preprocess_image(image)
            with torch.no_grad():
                image_inputs = self.owlv2_processor(
                    images=image,
                    return_tensors="pt"
                ).to(self.device)
                inputs = {**image_inputs, **self.processed_text}
                outputs = self.owlv2_model(**inputs)

                target_sizes = torch.tensor([image.size[::-1]])
                results = self.owlv2_processor.post_process_grounded_object_detection(
                    outputs=outputs,
                    target_sizes=target_sizes,
                    threshold=threshold
                )[0]

                detections = []
                for score, box, label in zip(results["scores"], results["boxes"], results["labels"]):
                    x1, y1, x2, y2 = box.tolist()
                    detections.append({
                        "confidence": score.item(),
                        "box": [int(x1), int(y1), int(x2), int(y2)],
                        "label": self.text_queries[label]
                    })
                return self._apply_nms(detections)

        except Exception as e:
            logger.error(f"Erro em detect_objects: {str(e)}")
            return []

    def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
        """Processa um vídeo utilizando CPU. Para na primeira detecção encontrada."""
        try:
            metrics = {
                "total_time": 0,
                "frame_extraction_time": 0,
                "analysis_time": 0,
                "frames_analyzed": 0,
                "video_duration": 0,
                "device_type": self.device.type,
                "detections": [],
                "technical": {
                    "model": "owlv2-base-patch16-ensemble",
                    "input_size": f"{resolution}x{resolution}",
                    "nms_threshold": 0.5,
                    "preprocessing": "basic",
                    "early_stop": True
                },
            }

            start_time = time.time()
            t0 = time.time()
            frames = self.extract_frames(video_path, fps, resolution)
            metrics["frame_extraction_time"] = time.time() - t0
            metrics["frames_analyzed"] = len(frames)

            if not frames:
                logger.warning("Nenhum frame extraído do vídeo")
                return video_path, metrics

            metrics["video_duration"] = len(frames) / (fps or 2)
            t0 = time.time()
            detections = []
            frames_processed = 0

            # Processar um frame por vez para otimizar memória e permitir parada precoce
            for frame_idx, frame in enumerate(frames):
                frames_processed += 1
                
                # Converter frame para RGB e pré-processar
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                image = Image.fromarray(frame_rgb)
                image = self._preprocess_image(image)

                # Detectar objetos com threshold direto
                with torch.no_grad():
                    image_inputs = self.owlv2_processor(
                        images=image,
                        return_tensors="pt"
                    ).to(self.device)
                    inputs = {**image_inputs, **self.processed_text}
                    outputs = self.owlv2_model(**inputs)

                    target_sizes = torch.tensor([image.size[::-1]])
                    results = self.owlv2_processor.post_process_grounded_object_detection(
                        outputs=outputs,
                        target_sizes=target_sizes,
                        threshold=threshold  # Aplicar threshold diretamente
                    )[0]

                    # Se encontrou alguma detecção acima do threshold
                    if len(results["scores"]) > 0:
                        # Pegar a detecção com maior confiança
                        max_score_idx = torch.argmax(results["scores"])
                        score = results["scores"][max_score_idx].item()
                        box = results["boxes"][max_score_idx].tolist()
                        label = results["labels"][max_score_idx].item()

                        detections.append({
                            "frame": frame_idx,
                            "confidence": score,
                            "box": [int(x) for x in box],
                            "label": self.text_queries[label]
                        })

                        # Atualizar métricas e parar o processamento
                        metrics["frames_processed_until_detection"] = frames_processed
                        metrics["analysis_time"] = time.time() - t0
                        metrics["total_time"] = time.time() - start_time
                        metrics["detections"] = detections
                        logger.info(f"Detecção encontrada após processar {frames_processed} frames")
                        return video_path, metrics

                # Liberar memória a cada 10 frames
                if frames_processed % 10 == 0:
                    gc.collect()

            # Se chegou aqui, não encontrou nenhuma detecção
            metrics["analysis_time"] = time.time() - t0
            metrics["total_time"] = time.time() - start_time
            metrics["frames_processed_until_detection"] = frames_processed
            metrics["detections"] = detections
            return video_path, metrics

        except Exception as e:
            logger.error(f"Erro ao processar vídeo: {str(e)}")
            return video_path, {}

    def extract_frames(self, video_path: str, fps: int = 2, resolution: int = 480) -> list:
        """Extrai frames de um vídeo utilizando ffmpeg."""
        frames = []
        temp_dir = Path(tempfile.mkdtemp())
        try:
            threads = min(os.cpu_count(), 4)  # Menor número de threads para CPU
            cmd = [
                'ffmpeg', '-i', video_path,
                '-threads', str(threads),
                '-vf', (f'fps={fps},'
                        f'scale={resolution}:{resolution}:force_original_aspect_ratio=decrease:flags=lanczos,'
                        f'pad={resolution}:{resolution}:(ow-iw)/2:(oh-ih)/2'),
                '-frame_pts', '1',
                f'{temp_dir}/%d.jpg'
            ]
            subprocess.run(cmd, check=True, capture_output=True)
            frame_files = sorted(temp_dir.glob('*.jpg'), key=lambda x: int(x.stem))
            chunk_size = 50  # Menor chunk size para CPU
            with ThreadPoolExecutor(max_workers=threads) as executor:
                for i in range(0, len(frame_files), chunk_size):
                    chunk = frame_files[i:i + chunk_size]
                    chunk_frames = list(tqdm(
                        executor.map(lambda f: cv2.imread(str(f)), chunk),
                        desc=f"Carregando frames {i+1}-{min(i+chunk_size, len(frame_files))}",
                        total=len(chunk)
                    ))
                    frames.extend(chunk_frames)
                    if i % (chunk_size * 5) == 0:
                        gc.collect()
        finally:
            shutil.rmtree(temp_dir)
        return frames

    def clear_cache(self):
        """Limpa cache e libera memória."""
        self.result_cache.clear()
        gc.collect()