import spaces import gradio as gr from transformers import pipeline, AutoImageProcessor, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification from torchvision import transforms import torch from PIL import Image import numpy as np # from utils.goat import call_inference / announcement soon import io import warnings # Suppress warnings warnings.filterwarnings("ignore", category=UserWarning, message="Using a slow image processor as `use_fast` is unset") # Ensure using GPU if available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load the first model and processor image_processor_1 = AutoImageProcessor.from_pretrained("haywoodsloan/ai-image-detector-deploy", use_fast=True) model_1 = Swinv2ForImageClassification.from_pretrained("haywoodsloan/ai-image-detector-deploy") model_1 = model_1.to(device) clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) # Load the second model model_2_path = "Heem2/AI-vs-Real-Image-Detection" clf_2 = pipeline("image-classification", model=model_2_path, device=device) # Load additional models models = ["Organika/sdxl-detector", "cmckinle/sdxl-flux-detector"] feature_extractor_3 = AutoFeatureExtractor.from_pretrained(models[0], device=device) model_3 = AutoModelForImageClassification.from_pretrained(models[0]).to(device) feature_extractor_4 = AutoFeatureExtractor.from_pretrained(models[1], device=device) model_4 = AutoModelForImageClassification.from_pretrained(models[1]).to(device) # Load the second model model_5_path = "prithivMLmods/Deep-Fake-Detector-v2-Model" clf_5 = pipeline("image-classification", model=model_5_path, device=device) # Define class names for all models class_names_1 = ['artificial', 'real'] class_names_2 = ['AI Image', 'Real Image'] labels_3 = ['AI', 'Real'] labels_4 = ['AI', 'Real'] class_names_5 = ['Realism', 'Deepfake'] def softmax(vector): e = np.exp(vector - np.max(vector)) # for numerical stability return e / e.sum() def augment_image(img_pil): # Example augmentation: horizontal flip transform_flip = transforms.Compose([ transforms.RandomHorizontalFlip(p=1.0) # Flip the image horizontally with probability 1.0 ]) # Example augmentation: rotation transform_rotate = transforms.Compose([ transforms.RandomRotation(degrees=(90, 90)) # Rotate the image by 90 degrees ]) augmented_img_flip = transform_flip(img_pil) augmented_img_rotate = transform_rotate(img_pil) return augmented_img_flip, augmented_img_rotate # def convert_pil_to_bytes(img_pil): # img_byte_arr = io.BytesIO() # img_pil.save(img_byte_arr, format='PNG') # img_byte_arr = img_byte_arr.getvalue() # return img_byte_arr def convert_pil_to_bytes(image, format='JPEG'): img_byte_arr = io.BytesIO() image.save(img_byte_arr, format=format) img_byte_arr = img_byte_arr.getvalue() return img_byte_arr @spaces.GPU(duration=10) def predict_image(img, confidence_threshold): # Ensure the image is a PIL Image if not isinstance(img, Image.Image): raise ValueError(f"Expected a PIL Image, but got {type(img)}") # Convert the image to RGB if not already if img.mode != 'RGB': img_pil = img.convert('RGB') else: img_pil = img # Resize the image img_pil = transforms.Resize((256, 256))(img_pil) # Size 224 for vits models img_pilvits = transforms.Resize((224, 224))(img_pil) # Predict using the first model try: prediction_1 = clf_1(img_pil) result_1 = {pred['label']: pred['score'] for pred in prediction_1} result_1output = [1, 'SwinV2-base', result_1['real'], result_1['artificial']] print(result_1output) # Ensure the result dictionary contains all class names for class_name in class_names_1: if class_name not in result_1: result_1[class_name] = 0.0 # Check if either class meets the confidence threshold if result_1['artificial'] >= confidence_threshold: label_1 = f"AI, Confidence: {result_1['artificial']:.4f}" result_1output += ['AI'] elif result_1['real'] >= confidence_threshold: label_1 = f"Real, Confidence: {result_1['real']:.4f}" result_1output += ['REAL'] else: label_1 = "Uncertain Classification" result_1output += ['UNCERTAIN'] except Exception as e: label_1 = f"Error: {str(e)}" print(result_1output) # Predict using the second model try: prediction_2 = clf_2(img_pilvits) result_2 = {pred['label']: pred['score'] for pred in prediction_2} result_2output = [2, 'ViT-base Classifer', result_2['Real Image'], result_2['AI Image']] print(result_2output) # Ensure the result dictionary contains all class names for class_name in class_names_2: if class_name not in result_2: result_2[class_name] = 0.0 # Check if either class meets the confidence threshold if result_2['AI Image'] >= confidence_threshold: label_2 = f"AI, Confidence: {result_2['AI Image']:.4f}" result_2output += ['AI'] elif result_2['Real Image'] >= confidence_threshold: label_2 = f"Real, Confidence: {result_2['Real Image']:.4f}" result_2output += ['REAL'] else: label_2 = "Uncertain Classification" result_2output += ['UNCERTAIN'] except Exception as e: label_2 = f"Error: {str(e)}" # Predict using the third model with softmax try: inputs_3 = feature_extractor_3(img_pil, return_tensors="pt").to(device) with torch.no_grad(): outputs_3 = model_3(**inputs_3) logits_3 = outputs_3.logits probabilities_3 = softmax(logits_3.cpu().numpy()[0]) result_3 = { labels_3[1]: float(probabilities_3[1]), # Real labels_3[0]: float(probabilities_3[0]) # AI } result_3output = [3, 'SDXL-Trained', float(probabilities_3[1]), float(probabilities_3[0])] print(result_3output) # Ensure the result dictionary contains all class names for class_name in labels_3: if class_name not in result_3: result_3[class_name] = 0.0 # Check if either class meets the confidence threshold if result_3['AI'] >= confidence_threshold: label_3 = f"AI, Confidence: {result_3['AI']:.4f}" result_3output += ['AI'] elif result_3['Real'] >= confidence_threshold: label_3 = f"Real, Confidence: {result_3['Real']:.4f}" result_3output += ['REAL'] else: label_3 = "Uncertain Classification" result_3output += ['UNCERTAIN'] except Exception as e: label_3 = f"Error: {str(e)}" # Predict using the fourth model with softmax try: inputs_4 = feature_extractor_4(img_pil, return_tensors="pt").to(device) with torch.no_grad(): outputs_4 = model_4(**inputs_4) logits_4 = outputs_4.logits probabilities_4 = softmax(logits_4.cpu().numpy()[0]) result_4 = { labels_4[1]: float(probabilities_4[1]), # Real labels_4[0]: float(probabilities_4[0]) # AI } result_4output = [4, 'SDXL + FLUX', float(probabilities_4[1]), float(probabilities_4[0])] print(result_4) # Ensure the result dictionary contains all class names for class_name in labels_4: if class_name not in result_4: result_4[class_name] = 0.0 # Check if either class meets the confidence threshold if result_4['AI'] >= confidence_threshold: label_4 = f"AI, Confidence: {result_4['AI']:.4f}" result_4output += ['AI'] elif result_4['Real'] >= confidence_threshold: label_4 = f"Real, Confidence: {result_4['Real']:.4f}" result_4output += ['REAL'] else: label_4 = "Uncertain Classification" result_4output += ['UNCERTAIN'] except Exception as e: label_4 = f"Error: {str(e)}" try: prediction_5 = clf_5(img_pilvits) result_5 = {pred['label']: pred['score'] for pred in prediction_5} result_5output = [5, 'ViT-base Newcomer', result_5['Realism'], result_5['Deepfake']] print(result_5output) # Ensure the result dictionary contains all class names for class_name in class_names_5: if class_name not in result_5: result_5[class_name] = 0.0 # Check if either class meets the confidence threshold if result_5['AI Image'] >= confidence_threshold: label_5 = f"AI, Confidence: {result_5['Deepfake']:.4f}" result_5output += ['AI'] elif result_5['Real Image'] >= confidence_threshold: label_5 = f"Real, Confidence: {result_5['Realism']:.4f}" result_5output += ['REAL'] else: label_5 = "Uncertain Classification" result_5output += ['UNCERTAIN'] except Exception as e: label_5 = f"Error: {str(e)}" # try: # result_5output = [5, 'TBA', 0.0, 0.0, 'MAINTENANCE'] # img_bytes = convert_pil_to_bytes(img_pil) # # print(img) # # print(img_bytes) # response5_raw = call_inference(img) # print(response5_raw) # response5 = response5_raw # print(response5) # label_5 = f"Result: {response5}" # except Exception as e: # label_5 = f"Error: {str(e)}" # Combine results combined_results = { "SwinV2/detect": label_1, "ViT/AI-vs-Real": label_2, "Swin/SDXL": label_3, "Swin/SDXL-FLUX": label_4, "prithivMLmods": label_5 } # Generate HTML content combined_outputs = [ result_1output, result_2output, result_3output, result_4output, result_5output ] # html_content = generate_results_html(combined_outputs) return img_pil, combined_outputs # Define a function to generate the HTML content # Define a function to generate the HTML content def generate_results_html(results): def get_header_color(label): if label == 'AI': return 'bg-red-500 text-red-700', 'bg-red-400', 'bg-red-100', 'bg-red-700 text-red-700', 'bg-red-200' elif label == 'REAL': return 'bg-green-500 text-green-700', 'bg-green-400', 'bg-green-100', 'bg-green-700 text-green-700', 'bg-green-200' elif label == 'UNCERTAIN': return 'bg-yellow-500 text-yellow-700 bg-yellow-100', 'bg-yellow-400', 'bg-yellow-100', 'bg-yellow-700 text-yellow-700', 'bg-yellow-200' elif label == 'MAINTENANCE': return 'bg-blue-500 text-blue-700', 'bg-blue-400', 'bg-blue-100', 'bg-blue-700 text-blue-700', 'bg-blue-200' else: return 'bg-gray-300 text-gray-700', 'bg-gray-400', 'bg-gray-100', 'bg-gray-700 text-gray-700', 'bg-gray-200' html_content = f"""
{results[0][-1]}
Conf: {results[0][2]:.4f}
Conf: {results[0][3]:.4f}
{results[1][-1]}
Conf: {results[1][2]:.4f}
Conf: {results[1][3]:.4f}
{results[2][-1]}
Conf: {results[2][2]:.4f}
Conf: {results[2][3]:.4f}
{results[3][-1]}
Conf: {results[3][2]:.4f}
Conf: {results[3][3]:.4f}
{results[4][-1]}
Conf: {results[4][2]:.4f}
Conf: {results[4][3]:.4f}