import os import time from typing import Literal import spaces import gradio as gr from gradio_client import Client, handle_file from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification from torchvision import transforms import torch from PIL import Image import numpy as np import io import logging from utils.utils import softmax, augment_image, convert_pil_to_bytes from forensics.gradient import gradient_processing from forensics.minmax import minmax_process from forensics.ela import ELA from forensics.wavelet import wavelet_blocking_noise_estimation from forensics.bitplane import bit_plane_extractor from utils.hf_logger import log_inference_data from utils.text_content import QUICK_INTRO, IMPLEMENTATION from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent from utils.registry import register_model, MODEL_REGISTRY, ModelEntry from agents.ensemble_weights import ModelWeightManager from dotenv import load_dotenv import json from huggingface_hub import CommitScheduler # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) os.environ['HF_HUB_CACHE'] = './models' LOCAL_LOG_DIR = "./hf_inference_logs" HF_DATASET_NAME="degentic_rd0" load_dotenv() # print(os.getenv("HF_HUB_CACHE")) # Custom JSON Encoder to handle numpy types class NumpyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.float32): return float(obj) return json.JSONEncoder.default(self, obj) # Ensure using GPU if available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') header_style = { "textAlign": 'center', "color": '#fff', "height": 64, "paddingInline": 48, "lineHeight": '64px', "backgroundColor": '#4096ff', } content_style = { "textAlign": 'center', "minHeight": 120, "lineHeight": '120px', "color": '#fff', "backgroundColor": '#0958d9', } sider_style = { "textAlign": 'center', "lineHeight": '120px', "color": '#fff', "backgroundColor": '#1677ff', } footer_style = { "textAlign": 'center', "color": '#fff', "backgroundColor": '#4096ff', } layout_style = { "borderRadius": 8, "overflow": 'hidden', "width": 'calc(100% - 8px)', "maxWidth": 'calc(100% - 8px)', } # Model paths and class names MODEL_PATHS = { "model_1": "haywoodsloan/ai-image-detector-deploy", "model_2": "Heem2/AI-vs-Real-Image-Detection", "model_3": "Organika/sdxl-detector", "model_4": "cmckinle/sdxl-flux-detector_v1.1", "model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", "model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", "model_7": "date3k2/vit-real-fake-classification-v4" } CLASS_NAMES = { "model_1": ['artificial', 'real'], "model_2": ['AI Image', 'Real Image'], "model_3": ['AI', 'Real'], "model_4": ['AI', 'Real'], "model_5": ['Realism', 'Deepfake'], "model_6": ['ai_gen', 'human'], "model_7": ['Fake', 'Real'], } def preprocess_resize_224(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((224, 224))(image) def preprocess_256(image): if image.mode != 'RGB': image = image.convert('RGB') return transforms.Resize((256, 256))(image) def postprocess_pipeline(prediction, class_names): # Assumes HuggingFace pipeline output return {pred['label']: pred['score'] for pred in prediction} def postprocess_logits(outputs, class_names): # Assumes model output with logits logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} # Expand ModelEntry to include metadata # (Assume ModelEntry is updated in registry.py to accept display_name, contributor, model_path) # If not, we will update registry.py accordingly after this. def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path): entry = ModelEntry(model, preprocess, postprocess, class_names) entry.display_name = display_name entry.contributor = contributor entry.model_path = model_path MODEL_REGISTRY[model_id] = entry # Load and register models (example for two models) image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device) clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) register_model_with_metadata( "model_1", clf_1, preprocess_256, postprocess_pipeline, CLASS_NAMES["model_1"], display_name="SwinV2 Based", contributor="haywoodsloan", model_path=MODEL_PATHS["model_1"] ) clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) register_model_with_metadata( "model_2", clf_2, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_2"], display_name="ViT Based", contributor="Heem2", model_path=MODEL_PATHS["model_2"] ) # Register remaining models feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) def postprocess_logits_model3(outputs, class_names): logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} def model3_infer(image): inputs = feature_extractor_3(image, return_tensors="pt").to(device) with torch.no_grad(): outputs = model_3(**inputs) return outputs register_model_with_metadata( "model_3", model3_infer, preprocess_256, postprocess_logits_model3, CLASS_NAMES["model_3"], display_name="SDXL Dataset", contributor="Organika", model_path=MODEL_PATHS["model_3"] ) feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) def model4_infer(image): inputs = feature_extractor_4(image, return_tensors="pt").to(device) with torch.no_grad(): outputs = model_4(**inputs) return outputs def postprocess_logits_model4(outputs, class_names): logits = outputs.logits.cpu().numpy()[0] probabilities = softmax(logits) return {class_names[i]: probabilities[i] for i in range(len(class_names))} register_model_with_metadata( "model_4", model4_infer, preprocess_256, postprocess_logits_model4, CLASS_NAMES["model_4"], display_name="SDXL + FLUX", contributor="cmckinle", model_path=MODEL_PATHS["model_4"] ) clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) register_model_with_metadata( "model_5", clf_5, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_5"], display_name="Vit Based", contributor="prithivMLmods", model_path=MODEL_PATHS["model_5"] ) image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) register_model_with_metadata( "model_6", clf_6, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_6"], display_name="Swin, Midj + SDXL", contributor="ideepankarsharma2003", model_path=MODEL_PATHS["model_6"] ) image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) register_model_with_metadata( "model_7", clf_7, preprocess_resize_224, postprocess_pipeline, CLASS_NAMES["model_7"], display_name="ViT", contributor="temp", model_path=MODEL_PATHS["model_7"] ) # Generic inference function def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict: entry = MODEL_REGISTRY[model_id] img = entry.preprocess(image) try: result = entry.model(img) scores = entry.postprocess(result, entry.class_names) # Flatten output for Dataframe: include metadata and both class scores ai_score = float(scores.get(entry.class_names[0], 0.0)) real_score = float(scores.get(entry.class_names[1], 0.0)) label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN") return { "Model": entry.display_name, "Contributor": entry.contributor, "HF Model Path": entry.model_path, "AI Score": ai_score, "Real Score": real_score, "Label": label } except Exception as e: return { "Model": entry.display_name, "Contributor": entry.contributor, "HF Model Path": entry.model_path, "AI Score": 0.0, # Ensure it's a float even on error "Real Score": 0.0, # Ensure it's a float even on error "Label": f"Error: {str(e)}" } # Update predict_image to use all registered models in order def predict_image(img, confidence_threshold): model_ids = [ "model_1", "model_2", "model_3", "model_4", "model_5", "model_6", "model_7" ] results = [infer(img, model_id, confidence_threshold) for model_id in model_ids] return img, results def get_consensus_label(results): labels = [r[4] for r in results if len(r) > 4] if not labels: return "No results" consensus = max(set(labels), key=labels.count) color = {"AI": "red", "REAL": "green", "UNCERTAIN": "orange"}.get(consensus, "gray") return f"{consensus}" # Update predict_with_ensemble to return consensus label def predict_with_ensemble(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): # Ensure img is a PIL Image (if it's not already) if not isinstance(img, Image.Image): try: # If it's a numpy array, convert it img = Image.fromarray(img) except Exception as e: logger.error(f"Error converting input image to PIL: {e}") # If conversion fails, it's a critical error for the whole process raise ValueError("Input image could not be converted to PIL Image.") # Initialize agents monitor_agent = EnsembleMonitorAgent() weight_manager = ModelWeightManager() optimization_agent = WeightOptimizationAgent(weight_manager) health_agent = SystemHealthAgent() # New smart agents context_agent = ContextualIntelligenceAgent() anomaly_agent = ForensicAnomalyDetectionAgent() # Monitor system health health_agent.monitor_system_health() if augment_methods: img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) else: img_pil = img img_np_og = np.array(img) # Convert PIL Image to NumPy array # 1. Get initial predictions from all models model_predictions_raw = {} confidence_scores = {} results = [] # To store the results for the DataFrame for model_id in MODEL_REGISTRY: model_start = time.time() result = infer(img_pil, model_id, confidence_threshold) model_end = time.time() # Monitor individual model performance monitor_agent.monitor_prediction( model_id, result["Label"], max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)), model_end - model_start ) model_predictions_raw[model_id] = result # Store the full result dictionary confidence_scores[model_id] = max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)) results.append(result) # Add individual model result to the list # 2. Infer context tags using ContextualIntelligenceAgent image_data_for_context = { "width": img.width, "height": img.height, "mode": img.mode, # Add more features like EXIF data if exif_full_dump is used } detected_context_tags = context_agent.infer_context_tags(image_data_for_context, model_predictions_raw) logger.info(f"Detected context tags: {detected_context_tags}") # 3. Get adjusted weights, passing context tags adjusted_weights = weight_manager.adjust_weights(model_predictions_raw, confidence_scores, context_tags=detected_context_tags) # 4. Optimize weights if needed # `final_prediction_label` is determined AFTER weighted consensus, so analyze_performance will be called later # 5. Calculate weighted consensus weighted_predictions = { "AI": 0.0, "REAL": 0.0, "UNCERTAIN": 0.0 } for model_id, prediction in model_predictions_raw.items(): # Use raw predictions for weighting # Ensure the prediction label is valid for weighted_predictions prediction_label = prediction.get("Label") # Extract the label if prediction_label in weighted_predictions: weighted_predictions[prediction_label] += adjusted_weights[model_id] else: # Handle cases where prediction might be an error or unexpected label logger.warning(f"Unexpected prediction label '{prediction_label}' from model '{model_id}'. Skipping its weight in consensus.") final_prediction_label = "UNCERTAIN" if weighted_predictions["AI"] > weighted_predictions["REAL"] and weighted_predictions["AI"] > weighted_predictions["UNCERTAIN"]: final_prediction_label = "AI" elif weighted_predictions["REAL"] > weighted_predictions["AI"] and weighted_predictions["REAL"] > weighted_predictions["UNCERTAIN"]: final_prediction_label = "REAL" # Call analyze_performance after final_prediction_label is known optimization_agent.analyze_performance(final_prediction_label, None) # 6. Perform forensic processing gradient_image = gradient_processing(img_np_og) # Added gradient processing gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True) # Added gradient processing minmax_image = minmax_process(img_np_og) # Added MinMax processing minmax_image2 = minmax_process(img_np_og, radius=6) # Added MinMax processing # bitplane_image = bit_plane_extractor(img_pil) # First pass - standard analysis ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True) # Second pass - enhanced visibility ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True) ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False) forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2] # 7. Generate boilerplate descriptions for forensic outputs for anomaly agent forensic_output_descriptions = [ f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}", "ELA analysis (Pass 1): Grayscale error map, quality 75.", "ELA analysis (Pass 2): Grayscale error map, quality 75, enhanced contrast.", "ELA analysis (Pass 3): Color error map, quality 75, enhanced contrast.", "Gradient processing: Highlights edges and transitions.", "Gradient processing: Int=45, Equalize=True", "MinMax processing: Deviations in local pixel values.", "MinMax processing (Radius=6): Deviations in local pixel values.", ] # You could also add descriptions for Wavelet and Bit Plane if they were dynamic outputs # For instance, if wavelet_blocking_noise_estimation had parameters that changed and you wanted to describe them. # 8. Analyze forensic outputs for anomalies using ForensicAnomalyDetectionAgent anomaly_detection_results = anomaly_agent.analyze_forensic_outputs(forensic_output_descriptions) logger.info(f"Forensic anomaly detection: {anomaly_detection_results['summary']}") # Prepare table rows for Dataframe (exclude model path) table_rows = [[ r.get("Model", ""), r.get("Contributor", ""), r.get("AI Score", 0.0) if r.get("AI Score") is not None else 0.0, r.get("Real Score", 0.0) if r.get("Real Score") is not None else 0.0, r.get("Label", "Error") ] for r in results] logger.info(f"Type of table_rows: {type(table_rows)}") for i, row in enumerate(table_rows): logger.info(f"Row {i} types: {[type(item) for item in row]}") # The get_consensus_label function is now replaced by final_prediction_label from weighted consensus consensus_html = f"