LPX
๐ fix(image processing): ensure image consistency for gradient and minmax preprocessors
322ab55
import spaces | |
import gradio as gr | |
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification | |
from torchvision import transforms | |
import torch | |
from PIL import Image | |
import numpy as np | |
import io | |
import logging | |
from utils.utils import softmax, augment_image, convert_pil_to_bytes, ELA | |
from utils.gradient import gradient_processing | |
from utils.minmax import preprocess as minmax_preprocess | |
# Configure logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Ensure using GPU if available | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# Model paths and class names | |
MODEL_PATHS = { | |
"model_1": "haywoodsloan/ai-image-detector-deploy", | |
"model_2": "Heem2/AI-vs-Real-Image-Detection", | |
"model_3": "Organika/sdxl-detector", | |
"model_4": "cmckinle/sdxl-flux-detector", | |
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", | |
"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22", | |
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", | |
"model_7": "date3k2/vit-real-fake-classification-v4" | |
} | |
CLASS_NAMES = { | |
"model_1": ['artificial', 'real'], | |
"model_2": ['AI Image', 'Real Image'], | |
"model_3": ['AI', 'Real'], | |
"model_4": ['AI', 'Real'], | |
"model_5": ['Realism', 'Deepfake'], | |
"model_5b": ['Real', 'Deepfake'], | |
"model_6": ['ai_gen', 'human'], | |
"model_7": ['Fake', 'Real'], | |
} | |
# Load models and processors | |
def load_models(): | |
image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) | |
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]) | |
model_1 = model_1.to(device) | |
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) | |
clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) | |
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) | |
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) | |
feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) | |
model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) | |
clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) | |
clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device) | |
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) | |
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) | |
clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) | |
image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) | |
model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) | |
clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) | |
return clf_1, clf_2, feature_extractor_3, model_3, feature_extractor_4, model_4, clf_5, clf_5b, clf_6, model_7, clf_7 | |
clf_1, clf_2, feature_extractor_3, model_3, feature_extractor_4, model_4, clf_5, clf_5b, clf_6, model_7, clf_7 = load_models() | |
def predict_with_model(img_pil, clf, class_names, confidence_threshold, model_name, model_id, feature_extractor=None): | |
try: | |
if feature_extractor: | |
inputs = feature_extractor(img_pil, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = clf(**inputs) | |
logits = outputs.logits | |
probabilities = softmax(logits.cpu().numpy()[0]) | |
result = {class_names[i]: probabilities[i] for i in range(len(class_names))} | |
else: | |
prediction = clf(img_pil) | |
result = {pred['label']: pred['score'] for pred in prediction} | |
result_output = [model_id, model_name, result.get(class_names[1], 0.0), result.get(class_names[0], 0.0)] | |
logger.info(result_output) | |
for class_name in class_names: | |
if class_name not in result: | |
result[class_name] = 0.0 | |
if result[class_names[0]] >= confidence_threshold: | |
label = f"AI, Confidence: {result[class_names[0]]:.4f}" | |
result_output.append('AI') | |
elif result[class_names[1]] >= confidence_threshold: | |
label = f"Real, Confidence: {result[class_names[1]]:.4f}" | |
result_output.append('REAL') | |
else: | |
label = "Uncertain Classification" | |
result_output.append('UNCERTAIN') | |
except Exception as e: | |
label = f"Error: {str(e)}" | |
result_output = [model_id, model_name, 0.0, 0.0, 'ERROR'] # Ensure result_output is assigned in case of error | |
return label, result_output | |
def predict_image(img, confidence_threshold): | |
if not isinstance(img, Image.Image): | |
raise ValueError(f"Expected a PIL Image, but got {type(img)}") | |
if img.mode != 'RGB': | |
img_pil = img.convert('RGB') | |
else: | |
img_pil = img | |
img_pil = transforms.Resize((256, 256))(img_pil) | |
img_pilvits = transforms.Resize((224, 224))(img_pil) | |
label_1, result_1output = predict_with_model(img_pil, clf_1, CLASS_NAMES["model_1"], confidence_threshold, "SwinV2-base", 1) | |
label_2, result_2output = predict_with_model(img_pilvits, clf_2, CLASS_NAMES["model_2"], confidence_threshold, "ViT-base Classifier", 2) | |
label_3, result_3output = predict_with_model(img_pil, model_3, CLASS_NAMES["model_3"], confidence_threshold, "SDXL-Trained", 3, feature_extractor_3) | |
label_4, result_4output = predict_with_model(img_pil, model_4, CLASS_NAMES["model_4"], confidence_threshold, "SDXL + FLUX", 4, feature_extractor_4) | |
label_5, result_5output = predict_with_model(img_pilvits, clf_5, CLASS_NAMES["model_5"], confidence_threshold, "ViT-base Newcomer", 5) | |
label_5b, result_5boutput = predict_with_model(img_pilvits, clf_5b, CLASS_NAMES["model_5b"], confidence_threshold, "ViT-base Newcomer", 6) | |
label_6, result_6output = predict_with_model(img_pilvits, clf_6, CLASS_NAMES["model_6"], confidence_threshold, "Swin Midjourney/SDXL", 7) | |
label_7, result_7output = predict_with_model(img_pilvits, clf_7, CLASS_NAMES["model_7"], confidence_threshold, "Vit", 7) | |
combined_results = { | |
"SwinV2/detect": label_1, | |
"ViT/AI-vs-Real": label_2, | |
"Swin/SDXL": label_3, | |
"Swin/SDXL-FLUX": label_4, | |
"prithivMLmods": label_5, | |
"prithivMLmods-2-22": label_5b, | |
"SwinMidSDXL": label_6, | |
"Vit": label_7 | |
} | |
print(combined_results) | |
combined_outputs = [result_1output, result_2output, result_3output, result_4output, result_5output, result_5boutput, result_6output, result_7output] | |
return img_pil, combined_outputs | |
# Define a function to generate the HTML content | |
def generate_results_html(results): | |
def get_header_color(label): | |
if label == 'AI': | |
return 'bg-red-500 text-red-700', 'bg-red-400', 'bg-red-100', 'bg-red-700 text-red-700', 'bg-red-200' | |
elif label == 'REAL': | |
return 'bg-green-500 text-green-700', 'bg-green-400', 'bg-green-100', 'bg-green-700 text-green-700', 'bg-green-200' | |
elif label == 'UNCERTAIN': | |
return 'bg-yellow-500 text-yellow-700 bg-yellow-100', 'bg-yellow-400', 'bg-yellow-100', 'bg-yellow-700 text-yellow-700', 'bg-yellow-200' | |
elif label == 'MAINTENANCE': | |
return 'bg-blue-500 text-blue-700', 'bg-blue-400', 'bg-blue-100', 'bg-blue-700 text-blue-700', 'bg-blue-200' | |
else: | |
return 'bg-gray-300 text-gray-700', 'bg-gray-400', 'bg-gray-100', 'bg-gray-700 text-gray-700', 'bg-gray-200' | |
def generate_tile_html(index, result, model_name, contributor, model_path): | |
label = result[-1] | |
header_colors = get_header_color(label) | |
real_conf = result[2] | |
ai_conf = result[3] | |
return f""" | |
<div | |
class="flex flex-col bg-gray-800 rounded-sm p-4 m-1 border border-gray-800 shadow-xs transition hover:shadow-lg dark:shadow-gray-700/25"> | |
<div | |
class="-m-4 h-24 {header_colors[0]} rounded-sm rounded-b-none transition border group-hover:border-gray-100 group-hover:shadow-lg group-hover:{header_colors[4]}"> | |
<span class="text-gray-300 font-mono tracking-widest p-4 pb-3 block text-xs text-center">MODEL {index + 1}:</span> | |
<span | |
class="flex w-30 mx-auto tracking-wide items-center justify-center rounded-full {header_colors[2]} px-1 py-0.5 {header_colors[3]}" | |
> | |
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="3" stroke="currentColor" class="w-4 h-4 mr-2 -ml-3 group-hover:animate group-hover:animate-pulse"> | |
{'<path stroke-linecap="round" stroke-linejoin="round" d="M9 12.75 11.25 15 15 9.75M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />' if label == 'REAL' else '<path stroke-linecap="round" stroke-linejoin="round" d="m9.75 9.75 4.5 4.5m0-4.5-4.5 4.5M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />'} | |
</svg> | |
<p class="whitespace-nowrap text-lg leading-normal font-bold text-center self-center align-middle py-px">{label}</p> | |
</span> | |
</div> | |
<div> | |
<div class="mt-4 relative -mx-4 bg-gray-800"> | |
<div class="w-full bg-gray-400 rounded-none h-8"> | |
<div class="inline-flex whitespace-nowrap bg-green-400 h-full rounded-none" style="width: {real_conf * 100:.2f}%;"> | |
<p class="p-2 px-4 text-xs self-center align-middle">Conf: | |
<span class="ml-1 font-medium font-mono">{real_conf:.4f}</span> | |
</p> | |
</div> | |
</div> | |
</div> | |
<div class="relative -mx-4 bg-gray-800"> | |
<div class="w-full bg-gray-400 rounded-none h-8"> | |
<div class="inline-flex whitespace-nowrap bg-red-400 h-full rounded-none" style="width: {ai_conf * 100:.2f}%;"> | |
<p class="p-2 px-4 text-xs self-center align-middle">Conf: | |
<span class="ml-1 font-medium font-mono">{ai_conf:.4f}</span> | |
</p> | |
</div> | |
</div> | |
</div> | |
</div> | |
<div class="flex flex-col items-start"> | |
<h4 class="mt-4 text-sm font-semibold tracking-wide">{model_name}</h4> | |
<div class="text-xs font-mono">Real: {real_conf:.4f}, AI: {ai_conf:.4f}</div> | |
<div class="card-footer"> | |
<a href="https://huggingface.co/{model_path}" target="_blank" class="mt-2 text-xs tracking-wide nowrap" style="font-size:0.66rem !important;">by @{contributor}</a> | |
</div> | |
</div> | |
</div> | |
""" | |
html_content = f""" | |
<link href="https://unpkg.com/[email protected]/dist/tailwind.min.css" rel="stylesheet"> | |
<div class="container mx-auto"> | |
<div class="grid xl:grid-cols-4 md:grid-cols-4 grid-cols-1 gap-1"> | |
{generate_tile_html(0, results[0], "SwinV2 Based", "haywoodsloan", MODEL_PATHS["model_1"])} | |
{generate_tile_html(1, results[1], "ViT Based", "Heem2", MODEL_PATHS["model_2"])} | |
{generate_tile_html(2, results[2], "SDXL Dataset", "Organika", MODEL_PATHS["model_3"])} | |
{generate_tile_html(3, results[3], "SDXL + FLUX", "cmckinle", MODEL_PATHS["model_4"])} | |
{generate_tile_html(4, results[4], "Vit Based", "prithivMLmods", MODEL_PATHS["model_5"])} | |
{generate_tile_html(5, results[5], "Vit Based, Newer Dataset", "prithivMLmods", MODEL_PATHS["model_5b"])} | |
{generate_tile_html(6, results[6], "Swin, Midj + SDXL", "ideepankarsharma2003", MODEL_PATHS["model_6"])} | |
{generate_tile_html(7, results[7], "ViT", "temp", MODEL_PATHS["model_7"])} | |
</div> | |
</div> | |
""" | |
return html_content | |
def predict_image_with_html(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): | |
if augment_methods: | |
img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) | |
else: | |
img_pil = img | |
img_pil, results = predict_image(img_pil, confidence_threshold) | |
img_np = np.array(img_pil) # Convert PIL Image to NumPy array | |
gradient_image = gradient_processing(img_np) # Added gradient processing | |
minmax_image = minmax_preprocess(img_np) # Added MinMax processing | |
# Generate ELA images with different presets | |
ela_img_1 = ELA(img_pil, scale=100, alpha=0.66) | |
ela_img_2 = ELA(img_pil, scale=75, alpha=0.8) | |
ela_img_3 = ELA(img_pil, scale=50, alpha=0.5) | |
ela_images = [ela_img_1, ela_img_2, ela_img_3, gradient_image, minmax_image] | |
html_content = generate_results_html(results) | |
return img_pil, ela_images, html_content | |
with gr.Blocks() as iface: | |
with gr.Tab("AI Image Detection"): | |
gr.Markdown("# AI Generated Image / Deepfake Detection Models Evaluation") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
image_input = gr.Image(label="Upload Image to Analyze", sources=['upload'], type='pil') | |
with gr.Accordion("Settings", open=False, elem_id="settings_accordion"): | |
augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods") | |
rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False) | |
noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False) | |
sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False) | |
confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold") | |
inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider] | |
predict_button = gr.Button("Predict") | |
image_output = gr.Image(label="Processed Image", visible=True) | |
ela_gallery = gr.Gallery(label="ELA Processed Images", visible=True) | |
with gr.Column(scale=2): | |
with gr.Accordion("Project OpenSight - Model Evaluations & Playground", open=False, elem_id="project_accordion"): | |
gr.Markdown("## OpenSight is a SOTA gen. image detection model, in pre-release prep.\n\nThis HF Space is a temporary home for us and the public to evaluate the shortcomings of current open source models.\n\n<-- Feel free to play around by starting with an image as we prepare our formal announcement.") | |
# Custom HTML component to display results in 5 columns | |
results_html = gr.HTML(label="Model Predictions") | |
outputs = [image_output, ela_gallery, results_html] | |
# Show/hide rotate slider based on selected augmentation method | |
augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider]) | |
augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider]) | |
augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider]) | |
predict_button.click( | |
fn=predict_image_with_html, | |
inputs=inputs, | |
outputs=outputs | |
) | |
predict_button.click( | |
fn=None, | |
js="() => {document.getElementById('project_accordion').open = false;}", # Close the project accordion | |
inputs=[], | |
outputs=[] | |
) | |
with gr.Tab("Another Interface"): | |
# Add components for the second interface here | |
gr.Markdown("# Another Interface") | |
# Example: Add a simple text input and output | |
text_input = gr.Textbox(label="Enter Text") | |
text_output = gr.Textbox(label="Processed Text") | |
text_button = gr.Button("Process Text") | |
text_button.click( | |
fn=lambda x: x.upper(), # Example function to convert text to uppercase | |
inputs=text_input, | |
outputs=text_output | |
) | |
# Launch the interface | |
iface.launch() |