import gradio as gr from gradio_client import Client, handle_file from PIL import Image, ImageFilter import numpy as np import os import time import logging import io import collections import onnxruntime from utils.utils import softmax, augment_image from forensics.gradient import gradient_processing from forensics.minmax import minmax_process from forensics.ela import ELA from forensics.wavelet import noise_estimation from forensics.bitplane import bit_plane_extractor from utils.hf_logger import log_inference_data from utils.load import load_image from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent from utils.registry import register_model, MODEL_REGISTRY, ModelEntry from agents.ensemble_weights import ModelWeightManager from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification from torchvision import transforms import torch import json from huggingface_hub import CommitScheduler from dotenv import load_dotenv logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) os.environ['HF_HUB_CACHE'] = './models' # --- Gradio Log Handler --- class GradioLogHandler(logging.Handler): def __init__(self, log_queue): super().__init__() self.log_queue = log_queue self.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) def emit(self, record): self.log_queue.append(self.format(record)) log_queue = collections.deque(maxlen=1000) # Store last 1000 log messages gradio_handler = GradioLogHandler(log_queue) # Set root logger level to DEBUG to capture all messages from agents logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(gradio_handler) # --- End Gradio Log Handler --- LOCAL_LOG_DIR = "./hf_inference_logs" HF_DATASET_NAME="aiwithoutborders-xyz/degentic_rd0" load_dotenv() # Custom JSON Encoder to handle numpy types class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.float32): return float(obj) return json.JSONEncoder.default(self, obj) # Ensure using GPU if available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Model paths and class names (copied from app_mcp.py) MODEL_PATHS = { "model_1": "haywoodsloan/ai-image-detector-deploy", "model_2": "Heem2/AI-vs-Real-Image-Detection", "model_3": "Organika/sdxl-detector", "model_4": "cmckinle/sdxl-flux-detector_v1.1", "model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", "model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", "model_7": "date3k2/vit-real-fake-classification-v4" } CLASS_NAMES = { "model_1": ['artificial', 'real'], "model_2": ['AI Image', 'Real Image'], "model_3": ['AI', 'Real'], "model_4": ['AI', 'Real'], "model_5": ['Realism', 'Deepfake'], "model_6": ['ai_gen', 'human'], "model_7": ['Fake', 'Real'], } def preprocess_resize_256(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((256, 256))(image) def preprocess_resize_224(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((224, 224))(image) def postprocess_pipeline(prediction, class_names): # Assumes HuggingFace pipeline output return {pred['label']: pred['score'] for pred in prediction} def postprocess_logits(outputs, class_names): # Assumes model output with logits logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path, architecture=None, dataset=None): entry = ModelEntry(model, preprocess, postprocess, class_names, display_name=display_name, contributor=contributor, model_path=model_path, architecture=architecture, dataset=dataset) MODEL_REGISTRY[model_id] = entry # Load and register models (copied from app_mcp.py) # image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) # model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device) # clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) # register_model_with_metadata( # "model_1", clf_1, preprocess_resize_256, postprocess_pipeline, CLASS_NAMES["model_1"], # display_name="SWIN1", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"], # architecture="SwinV2", dataset="TBA" # ) # --- ONNX Quantized Model Example --- ONNX_QUANTIZED_MODEL_PATH = "./models/model_1_quantized.onnx" def preprocess_onnx_input(image: Image.Image): # Preprocess image for ONNX model (e.g., for SwinV2, usually 256x256, normalized) if image.mode != 'RGB': image = image.convert('RGB') transform = transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]), # ImageNet normalization ]) input_tensor = transform(image) # ONNX expects numpy array with batch dimension (1, C, H, W) return input_tensor.unsqueeze(0).cpu().numpy() def infer_onnx_model(preprocessed_image_np): try: # Ensure the ONNX model exists before trying to load it if not os.path.exists(ONNX_QUANTIZED_MODEL_PATH): logger.error(f"ONNX quantized model not found at: {ONNX_QUANTIZED_MODEL_PATH}") raise FileNotFoundError(f"ONNX quantized model not found at: {ONNX_QUANTIZED_MODEL_PATH}") ort_session = onnxruntime.InferenceSession(ONNX_QUANTIZED_MODEL_PATH) ort_inputs = {ort_session.get_inputs()[0].name: preprocessed_image_np} ort_outputs = ort_session.run(None, ort_inputs) # Assuming the output is logits, apply softmax to get probabilities logits = ort_outputs[0] probabilities = softmax(logits[0]) # Remove batch dim, apply softmax return {"logits": logits, "probabilities": probabilities} except Exception as e: logger.error(f"Error during ONNX inference: {e}") # Return a structure consistent with other model errors return {"logits": np.array([]), "probabilities": np.array([])} def postprocess_onnx_output(onnx_output, class_names): probabilities = onnx_output.get("probabilities") if probabilities is not None and len(probabilities) == len(class_names): return {class_names[i]: probabilities[i] for i in range(len(class_names))} else: logger.warning("ONNX post-processing failed or class names mismatch.") return {name: 0.0 for name in class_names} # Register the ONNX quantized model register_model_with_metadata( "model_1_onnx_quantized", infer_onnx_model, preprocess_onnx_input, postprocess_onnx_output, CLASS_NAMES["model_1"], # Assuming it uses the same class names as model_1 display_name="SWIN1", contributor="haywoodsloan", model_path=ONNX_QUANTIZED_MODEL_PATH, architecture="SwinV2", dataset="TBA" ) # --- End ONNX Quantized Model Example --- clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) register_model_with_metadata( "model_2", clf_2, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_2"], display_name="VIT2", contributor="Heem2", model_path=MODEL_PATHS["model_2"], architecture="ViT", dataset="TBA" ) feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) def preprocess_256(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((256, 256))(image) def postprocess_logits_model3(outputs, class_names): logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} def model3_infer(image): inputs = feature_extractor_3(image, return_tensors="pt").to(device) with torch.no_grad(): outputs = model_3(**inputs) return outputs register_model_with_metadata( "model_3", model3_infer, preprocess_256, postprocess_logits_model3, CLASS_NAMES["model_3"], display_name="SDXL3", contributor="Organika", model_path=MODEL_PATHS["model_3"], architecture="VIT", dataset="SDXL" ) feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) def model4_infer(image): inputs = feature_extractor_4(image, return_tensors="pt").to(device) with torch.no_grad(): outputs = model_4(**inputs) return outputs def postprocess_logits_model4(outputs, class_names): logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} register_model_with_metadata( "model_4", model4_infer, preprocess_256, postprocess_logits_model4, CLASS_NAMES["model_4"], display_name="XLFLUX4", contributor="cmckinle", model_path=MODEL_PATHS["model_4"], architecture="VIT", dataset="SDXL, FLUX" ) clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) register_model_with_metadata( "model_5", clf_5, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5"], display_name="VIT5", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"], architecture="VIT", dataset="TBA" ) image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) register_model_with_metadata( "model_6", clf_6, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_6"], display_name="SWIN6", contributor="ideepankarsharma2003", model_path=MODEL_PATHS["model_6"], architecture="SWINv1", dataset="SDXL, Midjourney" ) image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) register_model_with_metadata( "model_7", clf_7, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_7"], display_name="VIT7", contributor="date3k2", model_path=MODEL_PATHS["model_7"], architecture="VIT", dataset="TBA" ) # def postprocess_simple_prediction(result, class_names): # scores = {name: 0.0 for name in class_names} # fake_prob = result.get("Fake Probability") # if fake_prob is not None: # # Assume class_names = ["AI", "REAL"] # scores["AI"] = float(fake_prob) # scores["REAL"] = 1.0 - float(fake_prob) # return scores # def simple_prediction(img): # client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview") # client.view_api() # print(type(img)) # result = client.predict( # handle_file(img), # api_name="simple_predict" # ) # return result # register_model_with_metadata( # model_id="simple_prediction", # model=simple_prediction, # preprocess=None, # postprocess=postprocess_simple_prediction, # class_names=["AI", "REAL"], # display_name="Community Forensics", # contributor="Jeongsoo Park", # model_path="aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT", # architecture="ViT", dataset="GOAT" # ) def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict: """Predict using a specific model. Args: image (Image.Image): The input image to classify. model_id (str): The ID of the model to use for classification. confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75. Returns: dict: A dictionary containing the model details, classification scores, and label. """ entry = MODEL_REGISTRY[model_id] img = entry.preprocess(image) if entry.preprocess else image try: result = entry.model(img) scores = entry.postprocess(result, entry.class_names) ai_score = float(scores.get(entry.class_names[0], 0.0)) real_score = float(scores.get(entry.class_names[1], 0.0)) label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN") return { "Model": entry.display_name, "Contributor": entry.contributor, "HF Model Path": entry.model_path, "AI Score": ai_score, "Real Score": real_score, "Label": label } except Exception as e: return { "Model": entry.display_name, "Contributor": entry.contributor, "HF Model Path": entry.model_path, "AI Score": 0.0, "Real Score": 0.0, "Label": f"Error: {str(e)}" } def full_prediction(img, confidence_threshold, rotate_degrees, noise_level, sharpen_strength): """Full prediction run, with a team of ensembles and agents. Args: img (url: str, Image.Image, np.ndarray): The input image to classify. confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75. rotate_degrees (int, optional): The degrees to rotate the image. noise_level (int, optional): The noise level to use. sharpen_strength (int, optional): The sharpen strength to use. Returns: dict: A dictionary containing the model details, classification scores, and label. """ # Ensure img is a PIL Image object if img is None: raise gr.Error("No image provided. Please upload an image to analyze.") # Handle filepath conversion if needed if isinstance(img, str): try: img = load_image(img) except Exception as e: logger.error(f"Error loading image from path: {e}") raise gr.Error(f"Could not load image from the provided path. Error: {str(e)}") if not isinstance(img, Image.Image): try: img = Image.fromarray(img) except Exception as e: logger.error(f"Error converting input image to PIL: {e}") raise gr.Error("Input image could not be converted to a valid image format. Please try another image.") # Ensure image is in RGB format for consistent processing if img.mode != 'RGB': img = img.convert('RGB') monitor_agent = EnsembleMonitorAgent() weight_manager = ModelWeightManager(strongest_model_id="simple_prediction") optimization_agent = WeightOptimizationAgent(weight_manager) health_agent = SystemHealthAgent() context_agent = ContextualIntelligenceAgent() anomaly_agent = ForensicAnomalyDetectionAgent() health_agent.monitor_system_health() if rotate_degrees or noise_level or sharpen_strength: img_pil, _ = augment_image(img, ["rotate", "add_noise", "sharpen"], rotate_degrees, noise_level, sharpen_strength) else: img_pil = img img_np_og = np.array(img) model_predictions_raw = {} confidence_scores = {} results = [] table_rows = [] # Stream results as each model finishes for model_id in MODEL_REGISTRY: model_start = time.time() result = infer(img_pil, model_id, confidence_threshold) model_end = time.time() monitor_agent.monitor_prediction( model_id, result["Label"], max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)), model_end - model_start ) model_predictions_raw[model_id] = result confidence_scores[model_id] = max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)) results.append(result) table_rows.append([ result.get("Model", ""), result.get("Contributor", ""), round(result.get("AI Score", 0.0), 3) if result.get("AI Score") is not None else 0.0, round(result.get("Real Score", 0.0), 3) if result.get("Real Score") is not None else 0.0, result.get("Label", "Error") ]) # Yield partial results: only update the table, others are None yield None, None, table_rows, None, None # After all models, compute the rest as before image_data_for_context = { "width": img.width, "height": img.height, "mode": img.mode, } detected_context_tags = context_agent.infer_context_tags(image_data_for_context, model_predictions_raw) logger.info(f"Detected context tags: {detected_context_tags}") adjusted_weights = weight_manager.adjust_weights(model_predictions_raw, confidence_scores, context_tags=detected_context_tags) weighted_predictions = {"AI": 0.0, "REAL": 0.0, "UNCERTAIN": 0.0} for model_id, prediction in model_predictions_raw.items(): prediction_label = prediction.get("Label") if prediction_label in weighted_predictions: weighted_predictions[prediction_label] += adjusted_weights[model_id] else: logger.warning(f"Unexpected prediction label '{prediction_label}' from model '{model_id}'. Skipping its weight in consensus.") final_prediction_label = "UNCERTAIN" if weighted_predictions["AI"] > weighted_predictions["REAL"] and weighted_predictions["AI"] > weighted_predictions["UNCERTAIN"]: final_prediction_label = "AI" elif weighted_predictions["REAL"] > weighted_predictions["AI"] and weighted_predictions["REAL"] > weighted_predictions["UNCERTAIN"]: final_prediction_label = "REAL" optimization_agent.analyze_performance(final_prediction_label, None) gradient_image = gradient_processing(img_np_og) gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True) minmax_image = minmax_process(img_np_og) minmax_image2 = minmax_process(img_np_og, radius=6) # bitplane_image = bit_plane_extractor(img_pil) ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True) ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True) ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False) forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2] forensic_output_descriptions = [ f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}", "ELA analysis (Pass 1): Grayscale error map, quality 75.", "ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.", "ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.", "Gradient processing: Highlights edges and transitions.", "Gradient processing: Int=45, Equalize=True", "MinMax processing: Deviations in local pixel values.", "MinMax processing (Radius=6): Deviations in local pixel values.", # "Bit Plane extractor: Visualization of individual bit planes from different color channels." ] anomaly_detection_results = anomaly_agent.analyze_forensic_outputs(forensic_output_descriptions) logger.info(f"Forensic anomaly detection: {anomaly_detection_results['summary']}") consensus_html = f"