OpenSight-Deepfake-Detection-Models-Playground

Running

OpenSight-Deepfake-Detection-Models-Playground / app_mcp.py

LPX

major(huge refactoring)

932e7b4 8 days ago

13.6 kB

	import os
	from typing import Literal
	import spaces
	import gradio as gr
	import modelscope_studio.components.antd as antd
	import modelscope_studio.components.antdx as antdx
	import modelscope_studio.components.base as ms
	from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
	from torchvision import transforms
	import torch
	from PIL import Image
	import numpy as np
	import io
	import logging
	from utils.utils import softmax, augment_image, convert_pil_to_bytes
	from utils.gradient import gradient_processing
	from utils.minmax import preprocess as minmax_preprocess
	from utils.ela import genELA as ELA
	from forensics.registry import register_model, MODEL_REGISTRY


	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	# Ensure using GPU if available
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	header_style = {
	"textAlign": 'center',
	"color": '#fff',
	"height": 64,
	"paddingInline": 48,
	"lineHeight": '64px',
	"backgroundColor": '#4096ff',
	}

	content_style = {
	"textAlign": 'center',
	"minHeight": 120,
	"lineHeight": '120px',
	"color": '#fff',
	"backgroundColor": '#0958d9',
	}

	sider_style = {
	"textAlign": 'center',
	"lineHeight": '120px',
	"color": '#fff',
	"backgroundColor": '#1677ff',
	}

	footer_style = {
	"textAlign": 'center',
	"color": '#fff',
	"backgroundColor": '#4096ff',
	}

	layout_style = {
	"borderRadius": 8,
	"overflow": 'hidden',
	"width": 'calc(100% - 8px)',
	"maxWidth": 'calc(100% - 8px)',
	}
	# Model paths and class names
	MODEL_PATHS = {
	"model_1": "haywoodsloan/ai-image-detector-deploy",
	"model_2": "Heem2/AI-vs-Real-Image-Detection",
	"model_3": "Organika/sdxl-detector",
	"model_4": "cmckinle/sdxl-flux-detector_v1.1",
	"model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model",
	"model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22",
	"model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL",
	"model_7": "date3k2/vit-real-fake-classification-v4"
	}

	CLASS_NAMES = {
	"model_1": ['artificial', 'real'],
	"model_2": ['AI Image', 'Real Image'],
	"model_3": ['AI', 'Real'],
	"model_4": ['AI', 'Real'],
	"model_5": ['Realism', 'Deepfake'],
	"model_5b": ['Real', 'Deepfake'],
	"model_6": ['ai_gen', 'human'],
	"model_7": ['Fake', 'Real'],

	}

	def preprocess_resize_256(image):
	if image.mode != 'RGB':
	image = image.convert('RGB')
	return transforms.Resize((256, 256))(image)

	def preprocess_resize_224(image):
	if image.mode != 'RGB':
	image = image.convert('RGB')
	return transforms.Resize((224, 224))(image)

	def postprocess_pipeline(prediction, class_names):
	# Assumes HuggingFace pipeline output
	return {pred['label']: pred['score'] for pred in prediction}

	def postprocess_logits(outputs, class_names):
	# Assumes model output with logits
	logits = outputs.logits.cpu().numpy()[0]
	probabilities = softmax(logits)
	return {class_names[i]: probabilities[i] for i in range(len(class_names))}

	# Load and register models (example for two models)
	image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True)
	model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]).to(device)
	clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device)
	register_model(
	"model_1",
	clf_1,
	preprocess_resize_256,
	postprocess_pipeline,
	CLASS_NAMES["model_1"]
	)

	clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device)
	register_model(
	"model_2",
	clf_2,
	preprocess_resize_224,
	postprocess_pipeline,
	CLASS_NAMES["model_2"]
	)

	# Register remaining models
	feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device)
	model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device)
	def preprocess_256(image):
	if image.mode != 'RGB':
	image = image.convert('RGB')
	return transforms.Resize((256, 256))(image)
	def postprocess_logits_model3(outputs, class_names):
	logits = outputs.logits.cpu().numpy()[0]
	probabilities = softmax(logits)
	return {class_names[i]: probabilities[i] for i in range(len(class_names))}
	def model3_infer(image):
	inputs = feature_extractor_3(image, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model_3(**inputs)
	return outputs
	register_model(
	"model_3",
	model3_infer,
	preprocess_256,
	postprocess_logits_model3,
	CLASS_NAMES["model_3"]
	)

	feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device)
	model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device)
	def model4_infer(image):
	inputs = feature_extractor_4(image, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model_4(**inputs)
	return outputs
	def postprocess_logits_model4(outputs, class_names):
	logits = outputs.logits.cpu().numpy()[0]
	probabilities = softmax(logits)
	return {class_names[i]: probabilities[i] for i in range(len(class_names))}
	register_model(
	"model_4",
	model4_infer,
	preprocess_256,
	postprocess_logits_model4,
	CLASS_NAMES["model_4"]
	)

	clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device)
	register_model(
	"model_5",
	clf_5,
	preprocess_resize_224,
	postprocess_pipeline,
	CLASS_NAMES["model_5"]
	)

	clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device)
	register_model(
	"model_5b",
	clf_5b,
	preprocess_resize_224,
	postprocess_pipeline,
	CLASS_NAMES["model_5b"]
	)

	image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True)
	model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device)
	clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device)
	register_model(
	"model_6",
	clf_6,
	preprocess_resize_224,
	postprocess_pipeline,
	CLASS_NAMES["model_6"]
	)

	image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True)
	model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device)
	clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device)
	register_model(
	"model_7",
	clf_7,
	preprocess_resize_224,
	postprocess_pipeline,
	CLASS_NAMES["model_7"]
	)

	# Generic inference function

	def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict:
	entry = MODEL_REGISTRY[model_id]
	img = entry.preprocess(image)
	try:
	result = entry.model(img)
	result = entry.postprocess(result, entry.class_names)
	# Add confidence threshold logic if needed
	return result
	except Exception as e:
	return {"error": str(e)}

	# Update predict_image to use all registered models in order

	def predict_image(img, confidence_threshold):
	model_ids = [
	"model_1", "model_2", "model_3", "model_4", "model_5", "model_5b", "model_6", "model_7"
	]
	results = [infer(img, model_id, confidence_threshold) for model_id in model_ids]
	return img, results

	# Update predict_image_with_json to return results as a list of dicts

	def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
	if augment_methods:
	img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength)
	else:
	img_pil = img
	img_pil, results = predict_image(img_pil, confidence_threshold)
	img_np = np.array(img_pil) # Convert PIL Image to NumPy array
	img_np_og = np.array(img) # Convert PIL Image to NumPy array

	gradient_image = gradient_processing(img_np) # Added gradient processing
	minmax_image = minmax_preprocess(img_np) # Added MinMax processing

	# First pass - standard analysis
	ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)

	# Second pass - enhanced visibility
	ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
	ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)

	forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, minmax_image]

	return img_pil, forensics_images, results

	with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as iface:
	with ms.Application() as app:
	with antd.ConfigProvider():
	antdx.Welcome(
	icon=
	"https://cdn-avatars.huggingface.co/v1/production/uploads/639daf827270667011153fbc/WpeSFhuB81DY-1TjNUmV_.png",
	title="Welcome to Project OpenSight",
	description=
	"The OpenSight aims to be an open-source SOTA generated image detection model. This HF Space is not only an introduction but a educational playground for the public to evaluate and challenge current open source models. Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds. "
	)
	with gr.Tab("👀 Detection Models Eval / Playground"):
	gr.Markdown("# Open Source Detection Models Found on the Hub\n\n - Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds once we're back on CUDA.\n - The Community Forensics mother of all detection models is now available for inference, head to the middle tab above this.\n - Lots of exciting things coming up, stay tuned!")

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil')
	with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"):
	augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods")
	rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False)
	noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False)
	sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False)
	confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold")
	inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider]
	predict_button = gr.Button("Predict")
	augment_button = gr.Button("Augment & Predict")
	image_output = gr.Image(label="Processed Image", visible=False)


	with gr.Column(scale=2):
	# Use Gradio-native Dataframe to display results
	results_table = gr.Dataframe(label="Model Predictions", headers=None, datatype="auto")
	forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[4], rows=[2], container=False, height="auto", object_fit="contain", elem_id="post-gallery")

	outputs = [image_output, forensics_gallery, results_table]

	# Show/hide rotate slider based on selected augmentation method
	augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider])
	augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider])
	augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider])

	predict_button.click(
	fn=predict_image_with_json,
	inputs=inputs,
	outputs=outputs
	)
	augment_button.click( # Connect Augment button to the function
	fn=predict_image_with_json,
	inputs=[
	image_input,
	confidence_slider,
	gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), # Default values
	rotate_slider,
	noise_slider,
	sharpen_slider
	],
	outputs=outputs
	)
	with gr.Tab("👑 Community Forensics Preview"):
	temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
	# preview # no idea if this will work
	with gr.Tab("🥇 Leaderboard"):
	gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")


	# Launch the interface
	iface.launch()