|
import os |
|
from typing import Literal |
|
import spaces |
|
import gradio as gr |
|
import modelscope_studio.components.antd as antd |
|
import modelscope_studio.components.antdx as antdx |
|
import modelscope_studio.components.base as ms |
|
from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification |
|
from torchvision import transforms |
|
import torch |
|
from PIL import Image |
|
import numpy as np |
|
import io |
|
import logging |
|
from utils.utils import softmax, augment_image, convert_pil_to_bytes |
|
from utils.gradient import gradient_processing |
|
from utils.minmax import preprocess as minmax_preprocess |
|
from utils.ela import genELA as ELA |
|
from forensics.registry import register_model, MODEL_REGISTRY |
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
|
|
header_style = { |
|
"textAlign": 'center', |
|
"color": '#fff', |
|
"height": 64, |
|
"paddingInline": 48, |
|
"lineHeight": '64px', |
|
"backgroundColor": '#4096ff', |
|
} |
|
|
|
content_style = { |
|
"textAlign": 'center', |
|
"minHeight": 120, |
|
"lineHeight": '120px', |
|
"color": '#fff', |
|
"backgroundColor": '#0958d9', |
|
} |
|
|
|
sider_style = { |
|
"textAlign": 'center', |
|
"lineHeight": '120px', |
|
"color": '#fff', |
|
"backgroundColor": '#1677ff', |
|
} |
|
|
|
footer_style = { |
|
"textAlign": 'center', |
|
"color": '#fff', |
|
"backgroundColor": '#4096ff', |
|
} |
|
|
|
layout_style = { |
|
"borderRadius": 8, |
|
"overflow": 'hidden', |
|
"width": 'calc(100% - 8px)', |
|
"maxWidth": 'calc(100% - 8px)', |
|
} |
|
|
|
MODEL_PATHS = { |
|
"model_1": "haywoodsloan/ai-image-detector-deploy", |
|
"model_2": "Heem2/AI-vs-Real-Image-Detection", |
|
"model_3": "Organika/sdxl-detector", |
|
"model_4": "cmckinle/sdxl-flux-detector_v1.1", |
|
"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", |
|
"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22", |
|
"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", |
|
"model_7": "date3k2/vit-real-fake-classification-v4" |
|
} |
|
|
|
CLASS_NAMES = { |
|
"model_1": ['artificial', 'real'], |
|
"model_2": ['AI Image', 'Real Image'], |
|
"model_3": ['AI', 'Real'], |
|
"model_4": ['AI', 'Real'], |
|
"model_5": ['Realism', 'Deepfake'], |
|
"model_5b": ['Real', 'Deepfake'], |
|
"model_6": ['ai_gen', 'human'], |
|
"model_7": ['Fake', 'Real'], |
|
|
|
} |
|
|
|
def preprocess_resize_256(image): |
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
return transforms.Resize((256, 256))(image) |
|
|
|
def preprocess_resize_224(image): |
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
return transforms.Resize((224, 224))(image) |
|
|
|
def postprocess_pipeline(prediction, class_names): |
|
|
|
return {pred['label']: pred['score'] for pred in prediction} |
|
|
|
def postprocess_logits(outputs, class_names): |
|
|
|
logits = outputs.logits.cpu().numpy()[0] |
|
probabilities = softmax(logits) |
|
return {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
|
|
|
|
image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) |
|
model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device) |
|
clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) |
|
register_model( |
|
"model_1", |
|
clf_1, |
|
preprocess_resize_256, |
|
postprocess_pipeline, |
|
CLASS_NAMES["model_1"] |
|
) |
|
|
|
clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) |
|
register_model( |
|
"model_2", |
|
clf_2, |
|
preprocess_resize_224, |
|
postprocess_pipeline, |
|
CLASS_NAMES["model_2"] |
|
) |
|
|
|
|
|
feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) |
|
model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) |
|
def preprocess_256(image): |
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
return transforms.Resize((256, 256))(image) |
|
def postprocess_logits_model3(outputs, class_names): |
|
logits = outputs.logits.cpu().numpy()[0] |
|
probabilities = softmax(logits) |
|
return {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
def model3_infer(image): |
|
inputs = feature_extractor_3(image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = model_3(**inputs) |
|
return outputs |
|
register_model( |
|
"model_3", |
|
model3_infer, |
|
preprocess_256, |
|
postprocess_logits_model3, |
|
CLASS_NAMES["model_3"] |
|
) |
|
|
|
feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) |
|
model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) |
|
def model4_infer(image): |
|
inputs = feature_extractor_4(image, return_tensors="pt").to(device) |
|
with torch.no_grad(): |
|
outputs = model_4(**inputs) |
|
return outputs |
|
def postprocess_logits_model4(outputs, class_names): |
|
logits = outputs.logits.cpu().numpy()[0] |
|
probabilities = softmax(logits) |
|
return {class_names[i]: probabilities[i] for i in range(len(class_names))} |
|
register_model( |
|
"model_4", |
|
model4_infer, |
|
preprocess_256, |
|
postprocess_logits_model4, |
|
CLASS_NAMES["model_4"] |
|
) |
|
|
|
clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) |
|
register_model( |
|
"model_5", |
|
clf_5, |
|
preprocess_resize_224, |
|
postprocess_pipeline, |
|
CLASS_NAMES["model_5"] |
|
) |
|
|
|
clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device) |
|
register_model( |
|
"model_5b", |
|
clf_5b, |
|
preprocess_resize_224, |
|
postprocess_pipeline, |
|
CLASS_NAMES["model_5b"] |
|
) |
|
|
|
image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) |
|
model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) |
|
clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) |
|
register_model( |
|
"model_6", |
|
clf_6, |
|
preprocess_resize_224, |
|
postprocess_pipeline, |
|
CLASS_NAMES["model_6"] |
|
) |
|
|
|
image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) |
|
model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) |
|
clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) |
|
register_model( |
|
"model_7", |
|
clf_7, |
|
preprocess_resize_224, |
|
postprocess_pipeline, |
|
CLASS_NAMES["model_7"] |
|
) |
|
|
|
|
|
|
|
def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict: |
|
entry = MODEL_REGISTRY[model_id] |
|
img = entry.preprocess(image) |
|
try: |
|
result = entry.model(img) |
|
result = entry.postprocess(result, entry.class_names) |
|
|
|
return result |
|
except Exception as e: |
|
return {"error": str(e)} |
|
|
|
|
|
|
|
def predict_image(img, confidence_threshold): |
|
model_ids = [ |
|
"model_1", "model_2", "model_3", "model_4", "model_5", "model_5b", "model_6", "model_7" |
|
] |
|
results = [infer(img, model_id, confidence_threshold) for model_id in model_ids] |
|
return img, results |
|
|
|
|
|
|
|
def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): |
|
if augment_methods: |
|
img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) |
|
else: |
|
img_pil = img |
|
img_pil, results = predict_image(img_pil, confidence_threshold) |
|
img_np = np.array(img_pil) |
|
img_np_og = np.array(img) |
|
|
|
gradient_image = gradient_processing(img_np) |
|
minmax_image = minmax_preprocess(img_np) |
|
|
|
|
|
ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True) |
|
|
|
|
|
ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True) |
|
ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False) |
|
|
|
forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, minmax_image] |
|
|
|
return img_pil, forensics_images, results |
|
|
|
with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as iface: |
|
with ms.Application() as app: |
|
with antd.ConfigProvider(): |
|
antdx.Welcome( |
|
icon= |
|
"https://cdn-avatars.huggingface.co/v1/production/uploads/639daf827270667011153fbc/WpeSFhuB81DY-1TjNUmV_.png", |
|
title="Welcome to Project OpenSight", |
|
description= |
|
"The OpenSight aims to be an open-source SOTA generated image detection model. This HF Space is not only an introduction but a educational playground for the public to evaluate and challenge current open source models. **Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds.** " |
|
) |
|
with gr.Tab("👀 Detection Models Eval / Playground"): |
|
gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - **Space will be upgraded shortly;** inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The **Community Forensics** mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
image_input = gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil') |
|
with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"): |
|
augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods") |
|
rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False) |
|
noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False) |
|
sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False) |
|
confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold") |
|
inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider] |
|
predict_button = gr.Button("Predict") |
|
augment_button = gr.Button("Augment & Predict") |
|
image_output = gr.Image(label="Processed Image", visible=False) |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
|
results_table = gr.Dataframe(label="Model Predictions", headers=None, datatype="auto") |
|
forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery") |
|
|
|
outputs = [image_output, forensics_gallery, results_table] |
|
|
|
|
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider]) |
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider]) |
|
augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider]) |
|
|
|
predict_button.click( |
|
fn=predict_image_with_json, |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
augment_button.click( |
|
fn=predict_image_with_json, |
|
inputs=[ |
|
image_input, |
|
confidence_slider, |
|
gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), |
|
rotate_slider, |
|
noise_slider, |
|
sharpen_slider |
|
], |
|
outputs=outputs |
|
) |
|
with gr.Tab("👑 Community Forensics Preview"): |
|
temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces") |
|
|
|
with gr.Tab("🥇 Leaderboard"): |
|
gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™") |
|
|
|
|
|
|
|
iface.launch() |