import gradio as gr import torch from transformers import AutoFeatureExtractor, AutoModelForImageClassification, pipeline import os import zipfile import shutil import matplotlib.pyplot as plt from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report, roc_curve, auc, ConfusionMatrixDisplay from PIL import Image import tempfile import numpy as np import urllib.request import base64 from io import BytesIO import logging from tqdm import tqdm # Set up logging logging.basicConfig(filename='app.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') MODEL_NAME = "cmckinle/sdxl-flux-detector" LABELS = ["AI", "Real"] class AIDetector: def __init__(self): self.pipe = pipeline("image-classification", MODEL_NAME) self.feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME) self.model = AutoModelForImageClassification.from_pretrained(MODEL_NAME) @staticmethod def softmax(vector): e = np.exp(vector - np.max(vector)) return e / e.sum() def predict(self, image): inputs = self.feature_extractor(image, return_tensors="pt") with torch.no_grad(): outputs = self.model(**inputs) logits = outputs.logits probabilities = self.softmax(logits.numpy()) prediction = logits.argmax(-1).item() label = LABELS[prediction] results = {label: float(prob) for label, prob in zip(LABELS, probabilities[0])} return label, results def custom_upload_handler(file): try: logging.info(f"Starting upload of file: {file.name}") file_size = os.path.getsize(file.name) logging.info(f"File size: {file_size} bytes") # Read and process the file in chunks chunk_size = 1024 * 1024 # 1MB chunks total_chunks = file_size // chunk_size + (1 if file_size % chunk_size > 0 else 0) with open(file.name, 'rb') as f: for chunk in tqdm(range(total_chunks), desc="Uploading"): data = f.read(chunk_size) if not data: break logging.debug(f"Processed chunk {chunk+1} of {total_chunks}") logging.info("File upload completed successfully") return file except Exception as e: logging.error(f"Error during file upload: {str(e)}") raise gr.Error(f"Upload failed: {str(e)}") def process_zip(zip_file): temp_dir = tempfile.mkdtemp() try: logging.info(f"Starting to process zip file: {zip_file.name}") # Validate zip structure with zipfile.ZipFile(zip_file.name, 'r') as z: file_list = z.namelist() if not ('real/' in file_list and 'ai/' in file_list): raise ValueError("Zip file must contain 'real' and 'ai' folders") z.extractall(temp_dir) labels, preds, images = [], [], [] false_positives, false_negatives = [], [] detector = AIDetector() total_images = sum(len(files) for _, _, files in os.walk(temp_dir)) processed_images = 0 for folder_name, ground_truth_label in [('real', 1), ('ai', 0)]: folder_path = os.path.join(temp_dir, folder_name) if not os.path.exists(folder_path): raise ValueError(f"Folder not found: {folder_path}") for img_name in os.listdir(folder_path): img_path = os.path.join(folder_path, img_name) try: with Image.open(img_path).convert("RGB") as img: _, prediction = detector.predict(img) pred_label = 0 if prediction["AI"] > prediction["Real"] else 1 preds.append(pred_label) labels.append(ground_truth_label) images.append(img_name) # Collect false positives and false negatives with image data if pred_label != ground_truth_label: with open(img_path, "rb") as img_file: img_data = base64.b64encode(img_file.read()).decode() if pred_label == 1 and ground_truth_label == 0: false_positives.append((img_name, img_data)) elif pred_label == 0 and ground_truth_label == 1: false_negatives.append((img_name, img_data)) except Exception as e: logging.error(f"Error processing image {img_name}: {e}") processed_images += 1 gr.Progress(processed_images / total_images) logging.info("Zip file processing completed successfully") return evaluate_model(labels, preds, false_positives, false_negatives) except Exception as e: logging.error(f"Error processing zip file: {str(e)}") raise gr.Error(f"Error processing zip file: {str(e)}") finally: shutil.rmtree(temp_dir) def format_classification_report(labels, preds): # Convert the report string to a dictionary report_dict = classification_report(labels, preds, output_dict=True) # Create an HTML table with updated CSS html = """
Class | Precision | Recall | F1-Score | Support |
---|---|---|---|---|
{class_name} | {report_dict[class_name]['precision']:.2f} | {report_dict[class_name]['recall']:.2f} | {report_dict[class_name]['f1-score']:.2f} | {report_dict[class_name]['support']} |
Accuracy | {report_dict['accuracy']:.2f} | {report_dict['macro avg']['support']} | ||
Macro Avg | {report_dict['macro avg']['precision']:.2f} | {report_dict['macro avg']['recall']:.2f} | {report_dict['macro avg']['f1-score']:.2f} | {report_dict['macro avg']['support']} |
Weighted Avg | {report_dict['weighted avg']['precision']:.2f} | {report_dict['weighted avg']['recall']:.2f} | {report_dict['weighted avg']['f1-score']:.2f} | {report_dict['weighted avg']['support']} |
{img_name}
{img_name}