import gradio as gr from gradio_client import Client, handle_file from PIL import Image, ImageFilter import numpy as np import os import time import logging import io import collections import onnxruntime import json from huggingface_hub import CommitScheduler, hf_hub_download, snapshot_download from dotenv import load_dotenv import concurrent.futures import ast import torch from utils.utils import softmax, augment_image from forensics.gradient import gradient_processing from forensics.minmax import minmax_process from forensics.ela import ELA from forensics.wavelet import noise_estimation from forensics.bitplane import bit_plane_extractor from utils.hf_logger import log_inference_data from utils.load import load_image from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent from utils.registry import register_model, MODEL_REGISTRY, ModelEntry from agents.ensemble_weights import ModelWeightManager from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification from torchvision import transforms logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) os.environ['HF_HUB_CACHE'] = './models' # --- Gradio Log Handler --- class GradioLogHandler(logging.Handler): def __init__(self, log_queue): super().__init__() self.log_queue = log_queue self.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) def emit(self, record): self.log_queue.append(self.format(record)) log_queue = collections.deque(maxlen=1000) # Store last 1000 log messages gradio_handler = GradioLogHandler(log_queue) # Set root logger level to DEBUG to capture all messages from agents logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(gradio_handler) # --- End Gradio Log Handler --- LOCAL_LOG_DIR = "./hf_inference_logs" HF_DATASET_NAME="aiwithoutborders-xyz/degentic_rd0" load_dotenv() # Custom JSON Encoder to handle numpy types class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.float32): return float(obj) return json.JSONEncoder.default(self, obj) # Ensure using GPU if available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Model paths and class names (copied from app_mcp.py) MODEL_PATHS = { "model_1": "LPX55/detection-model-1-ONNX", "model_2": "LPX55/detection-model-2-ONNX", "model_3": "LPX55/detection-model-3-ONNX", "model_4": "cmckinle/sdxl-flux-detector_v1.1", "model_5": "LPX55/detection-model-5-ONNX", "model_6": "LPX55/detection-model-6-ONNX", "model_7": "LPX55/detection-model-7-ONNX", "model_8": "aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT" } CLASS_NAMES = { "model_1": ['artificial', 'real'], "model_2": ['AI Image', 'Real Image'], "model_3": ['artificial', 'human'], "model_4": ['AI', 'Real'], "model_5": ['Realism', 'Deepfake'], "model_6": ['ai_gen', 'human'], "model_7": ['Fake', 'Real'], "model_8": ['Fake', 'Real'], } def preprocess_resize_256(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((256, 256))(image) def preprocess_resize_224(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((224, 224))(image) def postprocess_pipeline(prediction, class_names): # Assumes HuggingFace pipeline output return {pred['label']: float(pred['score']) for pred in prediction} def postprocess_logits(outputs, class_names): # Assumes model output with logits logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} def postprocess_binary_output(output, class_names): # output can be a dictionary {"probabilities": numpy_array} or directly a numpy_array probabilities_array = None if isinstance(output, dict) and "probabilities" in output: probabilities_array = output["probabilities"] elif isinstance(output, np.ndarray): probabilities_array = output else: logger.warning(f"Unexpected output type for binary post-processing: {type(output)}. Expected dict with 'probabilities' or numpy.ndarray.") return {class_names[0]: 0.0, class_names[1]: 1.0} logger.info(f"Debug: Probabilities array entering postprocess_binary_output: {probabilities_array}, type: {type(probabilities_array)}, shape: {probabilities_array.shape}") if probabilities_array is None: logger.warning("Probabilities array is None after extracting from output. Returning default scores.") return {class_names[0]: 0.0, class_names[1]: 1.0} if probabilities_array.size == 1: fake_prob = float(probabilities_array.item()) elif probabilities_array.size == 2: fake_prob = float(probabilities_array[0]) else: logger.warning(f"Unexpected probabilities array shape for binary post-processing: {probabilities_array.shape}. Expected size 1 or 2.") return {class_names[0]: 0.0, class_names[1]: 1.0} real_prob = 1.0 - fake_prob # Ensure Fake and Real sum to 1 return {class_names[0]: fake_prob, class_names[1]: real_prob} def infer_gradio_api(image_path): client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview") result_dict = client.predict( input_image=handle_file(image_path), api_name="/simple_predict" ) logger.info(f"Debug: Raw result_dict from Gradio API (model_8): {result_dict}, type: {type(result_dict)}") # result_dict is already a dictionary, no need for ast.literal_eval fake_probability = result_dict.get('Fake Probability', 0.0) logger.info(f"Debug: Parsed result_dict: {result_dict}, Extracted fake_probability: {fake_probability}") return {"probabilities": np.array([fake_probability])} # Return as a numpy array with one element # New preprocess function for Gradio API def preprocess_gradio_api(image: Image.Image): # The Gradio API expects a file path, so we need to save the PIL Image to a temporary file. temp_file_path = "./temp_gradio_input.png" image.save(temp_file_path) return temp_file_path # New postprocess function for Gradio API (adapting postprocess_binary_output) def postprocess_gradio_api(gradio_output, class_names): # gradio_output is expected to be a dictionary like {"probabilities": np.array([fake_prob])} probabilities_array = None if isinstance(gradio_output, dict) and "probabilities" in gradio_output: probabilities_array = gradio_output["probabilities"] elif isinstance(gradio_output, np.ndarray): probabilities_array = gradio_output else: logger.warning(f"Unexpected output type for Gradio API post-processing: {type(gradio_output)}. Expected dict with 'probabilities' or numpy.ndarray.") return {class_names[0]: 0.0, class_names[1]: 1.0} logger.info(f"Debug: Probabilities array entering postprocess_gradio_api: {probabilities_array}, type: {type(probabilities_array)}, shape: {probabilities_array.shape}") if probabilities_array is None or probabilities_array.size == 0: logger.warning("Probabilities array is None or empty after extracting from Gradio API output. Returning default scores.") return {class_names[0]: 0.0, class_names[1]: 1.0} # It should always be a single element array for fake probability fake_prob = float(probabilities_array.item()) real_prob = 1.0 - fake_prob return {class_names[0]: fake_prob, class_names[1]: real_prob} def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path, architecture=None, dataset=None): entry = ModelEntry(model, preprocess, postprocess, class_names, display_name=display_name, contributor=contributor, model_path=model_path, architecture=architecture, dataset=dataset) MODEL_REGISTRY[model_id] = entry def load_onnx_model_and_preprocessor(hf_model_id): # model_dir = snapshot_download(repo_id=hf_model_id, local_dir_use_symlinks=False) # Create a unique local directory for each ONNX model model_specific_dir = os.path.join("./models", hf_model_id.replace('/', '_')) os.makedirs(model_specific_dir, exist_ok=True) # Use hf_hub_download to get specific files into the model-specific directory onnx_model_path = hf_hub_download(repo_id=hf_model_id, filename="model_quantized.onnx", subfolder="onnx", local_dir=model_specific_dir, local_dir_use_symlinks=False) # Load preprocessor config preprocessor_config = {} try: preprocessor_config_path = hf_hub_download(repo_id=hf_model_id, filename="preprocessor_config.json", local_dir=model_specific_dir, local_dir_use_symlinks=False) with open(preprocessor_config_path, 'r') as f: preprocessor_config = json.load(f) except Exception as e: logger.warning(f"Could not download or load preprocessor_config.json for {hf_model_id}: {e}") # Load model config for class names if available model_config = {} try: model_config_path = hf_hub_download(repo_id=hf_model_id, filename="config.json", local_dir=model_specific_dir, local_dir_use_symlinks=False) with open(model_config_path, 'r') as f: model_config = json.load(f) except Exception as e: logger.warning(f"Could not download or load config.json for {hf_model_id}: {e}") return onnxruntime.InferenceSession(onnx_model_path), preprocessor_config, model_config # Cache for ONNX sessions and preprocessors _onnx_model_cache = {} def get_onnx_model_from_cache(hf_model_id): if hf_model_id not in _onnx_model_cache: logger.info(f"Loading ONNX model and preprocessor for {hf_model_id}...") _onnx_model_cache[hf_model_id] = load_onnx_model_and_preprocessor(hf_model_id) return _onnx_model_cache[hf_model_id] def preprocess_onnx_input(image: Image.Image, preprocessor_config: dict): # Preprocess image for ONNX model based on preprocessor_config if image.mode != 'RGB': image = image.convert('RGB') # Get image size and normalization values from preprocessor_config or use defaults # Use 'size' for initial resize and 'crop_size' for center cropping initial_resize_size = preprocessor_config.get('size', {'height': 224, 'width': 224}) crop_size = preprocessor_config.get('crop_size', initial_resize_size['height']) mean = preprocessor_config.get('image_mean', [0.485, 0.456, 0.406]) std = preprocessor_config.get('image_std', [0.229, 0.224, 0.225]) transform = transforms.Compose([ transforms.Resize((initial_resize_size['height'], initial_resize_size['width'])), transforms.CenterCrop(crop_size), # Apply center crop transforms.ToTensor(), transforms.Normalize(mean=mean, std=std), ]) input_tensor = transform(image) # ONNX expects numpy array with batch dimension (1, C, H, W) return input_tensor.unsqueeze(0).cpu().numpy() def infer_onnx_model(hf_model_id, preprocessed_image_np, model_config: dict): try: ort_session, _, _ = get_onnx_model_from_cache(hf_model_id) # Debug: Print expected input shape from ONNX model for input_meta in ort_session.get_inputs(): logger.info(f"Debug: ONNX model expected input name: {input_meta.name}, shape: {input_meta.shape}, type: {input_meta.type}") logger.info(f"Debug: preprocessed_image_np shape: {preprocessed_image_np.shape}") ort_inputs = {ort_session.get_inputs()[0].name: preprocessed_image_np} ort_outputs = ort_session.run(None, ort_inputs) logits = ort_outputs[0] logger.info(f"Debug: logits type: {type(logits)}, shape: {logits.shape}") # If the model outputs a single logit (e.g., shape (1,)), use .item() to convert to scalar # Otherwise, assume it's a batch of logits (e.g., shape (1, num_classes)) and take the first element (batch dim) # The num_classes in config.json can be misleading; rely on actual output shape. # Apply softmax to the logits to get probabilities for the classes # The softmax function in utils/utils.py now ensures a list of floats probabilities = softmax(logits[0]) # Assuming logits[0] is the relevant output for a single prediction return {"logits": logits, "probabilities": probabilities} except Exception as e: logger.error(f"Error during ONNX inference for {hf_model_id}: {e}") # Return a structure consistent with other model errors return {"logits": np.array([]), "probabilities": np.array([])} def postprocess_onnx_output(onnx_output, model_config): # Get class names from model_config # Prioritize id2label, then check num_classes, otherwise default class_names_map = model_config.get('id2label') if class_names_map: class_names = [class_names_map[k] for k in sorted(class_names_map.keys())] elif model_config.get('num_classes') == 1: # Handle models that output a single value (e.g., probability of 'Fake') class_names = ['Fake', 'Real'] # Assume first class is 'Fake' and second 'Real' else: class_names = {0: 'Fake', 1: 'Real'} # Default to Fake/Real if not found or not 1 class class_names = [class_names[i] for i in sorted(class_names.keys())] probabilities = onnx_output.get("probabilities") if probabilities is not None: if model_config.get('num_classes') == 1 and len(probabilities) == 2: # Special handling for single output models # The single output is the probability of the 'Fake' class fake_prob = float(probabilities[0]) real_prob = float(probabilities[1]) return {class_names[0]: fake_prob, class_names[1]: real_prob} elif len(probabilities) == len(class_names): return {class_names[i]: float(probabilities[i]) for i in range(len(class_names))} else: logger.warning("ONNX post-processing: Probabilities length mismatch with class names.") return {name: 0.0 for name in class_names} else: logger.warning("ONNX post-processing failed: 'probabilities' key not found in output.") return {name: 0.0 for name in class_names} # Register the ONNX quantized model # Dummy entry for ONNX model to be loaded dynamically # We will now register a 'wrapper' that handles dynamic loading class ONNXModelWrapper: def __init__(self, hf_model_id): self.hf_model_id = hf_model_id self._session = None self._preprocessor_config = None self._model_config = None def load(self): if self._session is None: self._session, self._preprocessor_config, self._model_config = get_onnx_model_from_cache(self.hf_model_id) logger.info(f"ONNX model {self.hf_model_id} loaded into wrapper.") def __call__(self, image_np): self.load() # Ensure model is loaded on first call # Pass model_config to infer_onnx_model return infer_onnx_model(self.hf_model_id, image_np, self._model_config) def preprocess(self, image: Image.Image): self.load() return preprocess_onnx_input(image, self._preprocessor_config) def postprocess(self, onnx_output: dict, class_names_from_registry: list): # class_names_from_registry is ignored self.load() return postprocess_onnx_output(onnx_output, self._model_config) # Consolidate all model loading and registration for model_key, hf_model_path in MODEL_PATHS.items(): logger.debug(f"Attempting to register model: {model_key} with path: {hf_model_path}") model_num = model_key.replace("model_", "").upper() contributor = "Unknown" architecture = "Unknown" dataset = "TBA" current_class_names = CLASS_NAMES.get(model_key, []) # Logic for ONNX models (1, 2, 3, 5, 6, 7) if "ONNX" in hf_model_path: logger.debug(f"Model {model_key} identified as ONNX.") logger.info(f"Registering ONNX model: {model_key} from {hf_model_path}") onnx_wrapper_instance = ONNXModelWrapper(hf_model_path) # Attempt to derive contributor, architecture, dataset based on model_key if model_key == "model_1": contributor = "haywoodsloan" architecture = "SwinV2" dataset = "DeepFakeDetection" elif model_key == "model_2": contributor = "Heem2" architecture = "ViT" dataset = "DeepFakeDetection" elif model_key == "model_3": contributor = "Organika" architecture = "VIT" dataset = "SDXL" elif model_key == "model_5": contributor = "prithivMLmods" architecture = "VIT" elif model_key == "model_6": contributor = "ideepankarsharma2003" architecture = "SWINv1" dataset = "SDXL, Midjourney" elif model_key == "model_7": contributor = "date3k2" architecture = "VIT" display_name_parts = [model_num] if architecture and architecture not in ["Unknown"]: display_name_parts.append(architecture) if dataset and dataset not in ["TBA"]: display_name_parts.append(dataset) display_name = "-".join(display_name_parts) display_name += "_ONNX" # Always append _ONNX for ONNX models register_model_with_metadata( model_id=model_key, model=onnx_wrapper_instance, # The callable wrapper for the ONNX model preprocess=onnx_wrapper_instance.preprocess, postprocess=onnx_wrapper_instance.postprocess, class_names=current_class_names, # Initial class names; will be overridden by model_config if available display_name=display_name, contributor=contributor, model_path=hf_model_path, architecture=architecture, dataset=dataset ) # Logic for Gradio API model (model_8) elif model_key == "model_8": logger.debug(f"Model {model_key} identified as Gradio API.") logger.info(f"Registering Gradio API model: {model_key} from {hf_model_path}") contributor = "aiwithoutborders-xyz" architecture = "ViT" dataset = "DeepfakeDetection" display_name_parts = [model_num] if architecture and architecture not in ["Unknown"]: display_name_parts.append(architecture) if dataset and dataset not in ["TBA"]: display_name_parts.append(dataset) display_name = "-".join(display_name_parts) register_model_with_metadata( model_id=model_key, model=infer_gradio_api, preprocess=preprocess_gradio_api, postprocess=postprocess_gradio_api, class_names=current_class_names, display_name=display_name, contributor=contributor, model_path=hf_model_path, architecture=architecture, dataset=dataset ) # Logic for PyTorch/Hugging Face pipeline models (currently only model_4) elif model_key == "model_4": # Explicitly handle model_4 logger.debug(f"Model {model_key} identified as PyTorch/HuggingFace pipeline.") logger.info(f"Registering HuggingFace pipeline/AutoModel: {model_key} from {hf_model_path}") contributor = "cmckinle" architecture = "VIT" dataset = "SDXL, FLUX" display_name_parts = [model_num] if architecture and architecture not in ["Unknown"]: display_name_parts.append(architecture) if dataset and dataset not in ["TBA"]: display_name_parts.append(dataset) display_name = "-".join(display_name_parts) current_processor = AutoFeatureExtractor.from_pretrained(hf_model_path, device=device) model_instance = AutoModelForImageClassification.from_pretrained(hf_model_path).to(device) preprocess_func = preprocess_resize_256 postprocess_func = postprocess_logits def custom_infer(image, processor_local=current_processor, model_local=model_instance): inputs = processor_local(image, return_tensors="pt").to(device) with torch.no_grad(): outputs = model_local(**inputs) return outputs model_instance = custom_infer register_model_with_metadata( model_id=model_key, model=model_instance, preprocess=preprocess_func, postprocess=postprocess_func, class_names=current_class_names, display_name=display_name, contributor=contributor, model_path=hf_model_path, architecture=architecture, dataset=dataset ) else: # Fallback for any unhandled models (shouldn't happen if MODEL_PATHS is fully covered) logger.warning(f"Could not automatically load and register model: {model_key} from {hf_model_path}. No matching registration logic found.") def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict: """Predict using a specific model. Args: image (Image.Image): The input image to classify. model_id (str): The ID of the model to use for classification. confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75. Returns: dict: A dictionary containing the model details, classification scores, and label. """ entry = MODEL_REGISTRY[model_id] img = entry.preprocess(image) if entry.preprocess else image try: result = entry.model(img) scores = entry.postprocess(result, entry.class_names) def _to_float_scalar(value): if isinstance(value, np.ndarray): return float(value.item()) # Convert numpy array scalar to Python float return float(value) # Already a Python scalar or convertible type ai_score = _to_float_scalar(scores.get(entry.class_names[0], 0.0)) real_score = _to_float_scalar(scores.get(entry.class_names[1], 0.0)) label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN") return { "Model": entry.display_name, "Contributor": entry.contributor, "HF Model Path": entry.model_path, "AI Score": ai_score, "Real Score": real_score, "Label": label } except Exception as e: return { "Model": entry.display_name, "Contributor": entry.contributor, "HF Model Path": entry.model_path, "AI Score": 0.0, "Real Score": 0.0, "Label": f"Error: {str(e)}" } def full_prediction(img, confidence_threshold, rotate_degrees, noise_level, sharpen_strength): """Full prediction run, with a team of ensembles and agents. Args: img (url: str, Image.Image, np.ndarray): The input image to classify. confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75. rotate_degrees (int, optional): The degrees to rotate the image. noise_level (int, optional): The noise level to use. sharpen_strength (int, optional): The sharpen strength to use. Returns: dict: A dictionary containing the model details, classification scores, and label. """ # Ensure img is a PIL Image object if img is None: raise gr.Error("No image provided. Please upload an image to analyze.") # Handle filepath conversion if needed if isinstance(img, str): try: img = load_image(img) except Exception as e: logger.error(f"Error loading image from path: {e}") raise gr.Error(f"Could not load image from the provided path. Error: {str(e)}") if not isinstance(img, Image.Image): try: img = Image.fromarray(img) except Exception as e: logger.error(f"Error converting input image to PIL: {e}") raise gr.Error("Input image could not be converted to a valid image format. Please try another image.") # Ensure image is in RGB format for consistent processing if img.mode != 'RGB': img = img.convert('RGB') monitor_agent = EnsembleMonitorAgent() weight_manager = ModelWeightManager(strongest_model_id="simple_prediction") optimization_agent = WeightOptimizationAgent(weight_manager) health_agent = SystemHealthAgent() context_agent = ContextualIntelligenceAgent() anomaly_agent = ForensicAnomalyDetectionAgent() health_agent.monitor_system_health() if rotate_degrees or noise_level or sharpen_strength: img_pil, _ = augment_image(img, ["rotate", "add_noise", "sharpen"], rotate_degrees, noise_level, sharpen_strength) else: img_pil = img img_np_og = np.array(img) model_predictions_raw = {} confidence_scores = {} results = [] table_rows = [] # Initialize lists for forensic outputs, starting with the original augmented image cleaned_forensics_images = [] forensic_output_descriptions = [] # Always add the original augmented image first for forensic display if isinstance(img_pil, Image.Image): cleaned_forensics_images.append(img_pil) forensic_output_descriptions.append(f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}") elif isinstance(img_pil, np.ndarray): try: pil_img_from_np = Image.fromarray(img_pil) cleaned_forensics_images.append(pil_img_from_np) forensic_output_descriptions.append(f"Original augmented image (numpy converted to PIL): {pil_img_from_np.width}x{pil_img_from_np.height}") except Exception as e: logger.warning(f"Could not convert original numpy image to PIL for gallery: {e}") # Yield initial state with augmented image and empty model predictions yield img_pil, cleaned_forensics_images, table_rows, "[]", "