|
import os |
|
from typing import Literal |
|
import spaces |
|
import gradio as gr |
|
import modelscope_studio.components.antd as antd |
|
import modelscope_studio.components.antdx as antdx |
|
import modelscope_studio.components.base as ms |
|
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification |
|
from torchvision import transforms |
|
import torch |
|
from PIL import Image |
|
import numpy as np |
|
import io |
|
import logging |
|
from utils.utils import softmax, augment_image, convert_pil_to_bytes |
|
from utils.gradient import gradient_processing |
|
from utils.minmax import preprocess as minmax_preprocess |
|
from utils.ela import genELA as ELA |
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
header_style = { |
|
"textAlign": 'center', |
|
"color": '#fff', |
|
"height": 64, |
|
"paddingInline": 48, |
|
"lineHeight": '64px', |
|
"backgroundColor": '#4096ff', |
|
} |
|
|
|
content_style = { |
|
"textAlign": 'center', |
|
"minHeight": 120, |
|
"lineHeight": '120px', |
|
"color": '#fff', |
|
"backgroundColor": '#0958d9', |
|
} |
|
|
|
sider_style = { |
|
"textAlign": 'center', |
|
"lineHeight": '120px', |
|
"color": '#fff', |
|
"backgroundColor": '#1677ff', |
|
} |
|
|
|
footer_style = { |
|
"textAlign": 'center', |
|
"color": '#fff', |
|
"backgroundColor": '#4096ff', |
|
} |
|
|
|
layout_style = { |
|
"borderRadius": 8, |
|
"overflow": 'hidden', |
|
"width": 'calc(100% - 8px)', |
|
"maxWidth": 'calc(100% - 8px)', |
|
} |
|
|
|
MODEL_PATHS = { |
|
"model_1": "haywoodsloan/ai-image-detector-deploy", |
|
"model_2": "Heem2/AI-vs-Real-Image-Detection", |
|
"model_3": "Organika/sdxl-detector", |
|
"model_4": "cmckinle/sdxl-flux-detector", |
|
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", |
|
"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22", |
|
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", |
|
"model_7": "date3k2/vit-real-fake-classification-v4" |
|
} |
|
|
|
CLASS_NAMES = { |
|
"model_1": ['artificial', 'real'], |
|
"model_2": ['AI Image', 'Real Image'], |
|
"model_3": ['AI', 'Real'], |
|
"model_4": ['AI', 'Real'], |
|
"model_5": ['Realism', 'Deepfake'], |
|
"model_5b": ['Real', 'Deepfake'], |
|
"model_6": ['ai_gen', 'human'], |
|
"model_7": ['Fake', 'Real'], |
|
|
|
} |
|
|
|
|
|
def load_models(): |
|
image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) |
|
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]) |
|
model_1 = model_1.to(device) |
|
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) |
|
|
|
clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) |
|
|
|
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) |
|
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) |
|
|
|
feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) |
|
model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) |
|
|
|
clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) |
|
clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device) |
|
|
|
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) |
|
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) |
|
clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) |
|
|
|
image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) |
|
model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) |
|
clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) |
|
|
|
return clf_1, clf_2, feature_extractor_3, model_3, feature_extractor_4, model_4, clf_5, clf_5b, clf_6, model_7, clf_7 |
|
|
|
clf_1, clf_2, feature_extractor_3, model_3, feature_extractor_4, model_4, clf_5, clf_5b, clf_6, model_7, clf_7 = load_models() |
|
|
|
@spaces.GPU(duration=10) |
|
def predict_with_model(img_pil, clf, class_names, confidence_threshold, model_name, model_id, feature_extractor=None): |
|
try: |
|
if feature_extractor: |
|
inputs = feature_extractor(img_pil, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = clf(**inputs) |
|
logits = outputs.logits |
|
probabilities = softmax(logits.cpu().numpy()[0]) |
|
result = {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
else: |
|
prediction = clf(img_pil) |
|
result = {pred['label']: pred['score'] for pred in prediction} |
|
|
|
result_output = [model_id, model_name, result.get(class_names[1], 0.0), result.get(class_names[0], 0.0)] |
|
logger.info(result_output) |
|
for class_name in class_names: |
|
if class_name not in result: |
|
result[class_name] = 0.0 |
|
if result[class_names[0]] >= confidence_threshold: |
|
label = f"AI, Confidence: {result[class_names[0]]:.4f}" |
|
result_output.append('AI') |
|
elif result[class_names[1]] >= confidence_threshold: |
|
label = f"Real, Confidence: {result[class_names[1]]:.4f}" |
|
result_output.append('REAL') |
|
else: |
|
label = "Uncertain Classification" |
|
result_output.append('UNCERTAIN') |
|
except Exception as e: |
|
label = f"Error: {str(e)}" |
|
result_output = [model_id, model_name, 0.0, 0.0, 'ERROR'] |
|
return label, result_output |
|
|
|
@spaces.GPU(duration=10) |
|
def predict_image(img, confidence_threshold): |
|
if not isinstance(img, Image.Image): |
|
raise ValueError(f"Expected a PIL Image, but got {type(img)}") |
|
if img.mode != 'RGB': |
|
img_pil = img.convert('RGB') |
|
else: |
|
img_pil = img |
|
img_pil = transforms.Resize((256, 256))(img_pil) |
|
img_pilvits = transforms.Resize((224, 224))(img_pil) |
|
|
|
label_1, result_1output = predict_with_model(img_pil, clf_1, CLASS_NAMES["model_1"], confidence_threshold, "SwinV2-base", 1) |
|
label_2, result_2output = predict_with_model(img_pilvits, clf_2, CLASS_NAMES["model_2"], confidence_threshold, "ViT-base Classifier", 2) |
|
label_3, result_3output = predict_with_model(img_pil, model_3, CLASS_NAMES["model_3"], confidence_threshold, "SDXL-Trained", 3, feature_extractor_3) |
|
label_4, result_4output = predict_with_model(img_pil, model_4, CLASS_NAMES["model_4"], confidence_threshold, "SDXL + FLUX", 4, feature_extractor_4) |
|
label_5, result_5output = predict_with_model(img_pilvits, clf_5, CLASS_NAMES["model_5"], confidence_threshold, "ViT-base Newcomer", 5) |
|
label_5b, result_5boutput = predict_with_model(img_pilvits, clf_5b, CLASS_NAMES["model_5b"], confidence_threshold, "ViT-base Newcomer", 6) |
|
label_6, result_6output = predict_with_model(img_pilvits, clf_6, CLASS_NAMES["model_6"], confidence_threshold, "Swin Midjourney/SDXL", 7) |
|
label_7, result_7output = predict_with_model(img_pilvits, clf_7, CLASS_NAMES["model_7"], confidence_threshold, "Vit", 7) |
|
|
|
combined_results = { |
|
"SwinV2/detect": label_1, |
|
"ViT/AI-vs-Real": label_2, |
|
"Swin/SDXL": label_3, |
|
"Swin/SDXL-FLUX": label_4, |
|
"prithivMLmods": label_5, |
|
"prithivMLmods-2-22": label_5b, |
|
"SwinMidSDXL": label_6, |
|
"Vit": label_7 |
|
} |
|
print(combined_results) |
|
|
|
combined_outputs = [result_1output, result_2output, result_3output, result_4output, result_5output, result_5boutput, result_6output, result_7output] |
|
return img_pil, combined_outputs |
|
|
|
|
|
def generate_results_html(results): |
|
def get_header_color(label): |
|
if label == 'AI': |
|
return 'bg-red-500 text-red-700', 'bg-red-400', 'bg-red-100', 'bg-red-700 text-red-700', 'bg-red-200' |
|
elif label == 'REAL': |
|
return 'bg-green-500 text-green-700', 'bg-green-400', 'bg-green-100', 'bg-green-700 text-green-700', 'bg-green-200' |
|
elif label == 'UNCERTAIN': |
|
return 'bg-yellow-500 text-yellow-700 bg-yellow-100', 'bg-yellow-400', 'bg-yellow-100', 'bg-yellow-700 text-yellow-700', 'bg-yellow-200' |
|
elif label == 'MAINTENANCE': |
|
return 'bg-blue-500 text-blue-700', 'bg-blue-400', 'bg-blue-100', 'bg-blue-700 text-blue-700', 'bg-blue-200' |
|
else: |
|
return 'bg-gray-300 text-gray-700', 'bg-gray-400', 'bg-gray-100', 'bg-gray-700 text-gray-700', 'bg-gray-200' |
|
|
|
def generate_tile_html(index, result, model_name, contributor, model_path): |
|
label = result[-1] |
|
header_colors = get_header_color(label) |
|
real_conf = result[2] |
|
ai_conf = result[3] |
|
return f""" |
|
<div |
|
class="flex flex-col bg-gray-800 rounded-sm p-4 m-1 border border-gray-800 shadow-xs transition hover:shadow-lg dark:shadow-gray-700/25"> |
|
<div |
|
class="-m-4 h-24 {header_colors[0]} rounded-sm rounded-b-none transition border group-hover:border-gray-100 group-hover:shadow-lg group-hover:{header_colors[4]}"> |
|
<span class="text-gray-300 font-mono tracking-widest p-4 pb-3 block text-xs text-center">MODEL {index + 1}:</span> |
|
<span |
|
class="flex w-30 mx-auto tracking-wide items-center justify-center rounded-full {header_colors[2]} px-1 py-0.5 {header_colors[3]}" |
|
> |
|
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="3" stroke="currentColor" class="w-4 h-4 mr-2 -ml-3 group-hover:animate group-hover:animate-pulse"> |
|
{'<path stroke-linecap="round" stroke-linejoin="round" d="M9 12.75 11.25 15 15 9.75M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />' if label == 'REAL' else '<path stroke-linecap="round" stroke-linejoin="round" d="m9.75 9.75 4.5 4.5m0-4.5-4.5 4.5M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />'} |
|
</svg> |
|
<p class="text-base whitespace-nowrap leading-normal font-bold text-center self-center align-middle py-px">{label}</p> |
|
</span> |
|
</div> |
|
<div> |
|
<div class="mt-4 relative -mx-4 bg-gray-800"> |
|
<div class="w-full bg-gray-400 rounded-none h-8"> |
|
<div class="inline-flex whitespace-nowrap bg-green-400 h-full rounded-none" style="width: {real_conf * 100:.2f}%;"> |
|
<p class="p-2 px-4 text-xs self-center align-middle">Conf: |
|
<span class="ml-1 font-medium font-mono">{real_conf:.4f}</span> |
|
</p> |
|
</div> |
|
</div> |
|
</div> |
|
<div class="relative -mx-4 bg-gray-800"> |
|
<div class="w-full bg-gray-400 rounded-none h-8"> |
|
<div class="inline-flex whitespace-nowrap bg-red-400 h-full rounded-none" style="width: {ai_conf * 100:.2f}%;"> |
|
<p class="p-2 px-4 text-xs self-center align-middle">Conf: |
|
<span class="ml-1 font-medium font-mono">{ai_conf:.4f}</span> |
|
</p> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
<div class="flex flex-col items-start"> |
|
<h4 class="mt-4 text-sm font-semibold tracking-wide">{model_name}</h4> |
|
<div class="text-xs font-mono">Real: {real_conf:.4f}, AI: {ai_conf:.4f}</div> |
|
<div class="card-footer"> |
|
<a href="https://huggingface.co/{model_path}" target="_blank" class="mt-2 text-xs text-nowrap nowrap" style="font-size:0.66rem !important;">by @{contributor}</a> |
|
</div> |
|
</div> |
|
</div> |
|
""" |
|
|
|
html_content = f""" |
|
<link href="https://unpkg.com/[email protected]/dist/tailwind.min.css" rel="stylesheet"> |
|
<div class="container mx-auto"> |
|
<div class="grid xl:grid-cols-4 md:grid-cols-4 grid-cols-1 gap-1"> |
|
{generate_tile_html(0, results[0], "SwinV2 Based", "haywoodsloan", MODEL_PATHS["model_1"])} |
|
{generate_tile_html(1, results[1], "ViT Based", "Heem2", MODEL_PATHS["model_2"])} |
|
{generate_tile_html(2, results[2], "SDXL Dataset", "Organika", MODEL_PATHS["model_3"])} |
|
{generate_tile_html(3, results[3], "SDXL + FLUX", "cmckinle", MODEL_PATHS["model_4"])} |
|
{generate_tile_html(4, results[4], "Vit Based", "prithivMLmods", MODEL_PATHS["model_5"])} |
|
{generate_tile_html(5, results[5], "Vit Based, Newer Dataset", "prithivMLmods", MODEL_PATHS["model_5b"])} |
|
{generate_tile_html(6, results[6], "Swin, Midj + SDXL", "ideepankarsharma2003", MODEL_PATHS["model_6"])} |
|
{generate_tile_html(7, results[7], "ViT", "temp", MODEL_PATHS["model_7"])} |
|
</div> |
|
</div> |
|
""" |
|
return html_content |
|
|
|
|
|
def predict_image_with_html(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): |
|
if augment_methods: |
|
img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) |
|
else: |
|
img_pil = img |
|
img_pil, results = predict_image(img_pil, confidence_threshold) |
|
img_np = np.array(img_pil) |
|
|
|
gradient_image = gradient_processing(img_np) |
|
minmax_image = minmax_preprocess(img_np) |
|
|
|
|
|
ela_img_1 = ELA(img_pil, scale=100, alpha=0.66) |
|
ela_img_2 = ELA(img_pil, scale=50, alpha=0.5) |
|
|
|
forensics_images = [img_pil, ela_img_1, ela_img_2, gradient_image, minmax_image] |
|
|
|
html_content = generate_results_html(results) |
|
return img_pil, forensics_images, html_content |
|
|
|
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as iface: |
|
with ms.Application() as app: |
|
with antd.ConfigProvider(): |
|
antdx.Welcome( |
|
icon= |
|
"https://cdn-avatars.huggingface.co/v1/production/uploads/639daf827270667011153fbc/WpeSFhuB81DY-1TjNUmV_.png", |
|
title="Welcome to Project OpenSight", |
|
description= |
|
"The OpenSight aims to be an open-source SOTA generated image detection model. This HF Space is not only an introduction but a educational playground for the public to evaluate and challenge current open source models. **Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds.** " |
|
) |
|
with gr.Tab("👀 Detection Models Eval / Playground"): |
|
gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - **Space will be upgraded shortly;** inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The **Community Forensics** mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
image_input = gr.Image(label="Upload Image to Analyze", sources=['upload'], type='pil') |
|
with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"): |
|
augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods") |
|
rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False) |
|
noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False) |
|
sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False) |
|
confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold") |
|
inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider] |
|
predict_button = gr.Button("Predict") |
|
augment_button = gr.Button("Augment & Predict") |
|
image_output = gr.Image(label="Processed Image", visible=False) |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
results_html = gr.HTML(label="Model Predictions") |
|
forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[5], rows=[1], container=False, height="auto", object_fit="contain", elem_id="post-gallery") |
|
|
|
outputs = [image_output, forensics_gallery, results_html] |
|
|
|
|
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider]) |
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider]) |
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider]) |
|
|
|
predict_button.click( |
|
fn=predict_image_with_html, |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
augment_button.click( |
|
fn=predict_image_with_html, |
|
inputs=[ |
|
image_input, |
|
confidence_slider, |
|
gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), |
|
rotate_slider, |
|
noise_slider, |
|
sharpen_slider |
|
], |
|
outputs=outputs |
|
) |
|
predict_button.click( |
|
fn=None, |
|
js="() => {document.getElementById('project_accordion').open = false;}", |
|
inputs=[], |
|
outputs=[] |
|
) |
|
with gr.Tab("👑 Community Forensics Preview"): |
|
temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces") |
|
|
|
with gr.Tab("🥇 Leaderboard"): |
|
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™") |
|
|
|
|
|
|
|
iface.launch() |