LPX commited on
Commit
22628b7
·
1 Parent(s): bcb108a

major(feat): add support for Gradio API model and enhance ONNX model handling

Browse files

- Introduced new model (model_8) using Gradio API for inference.
- Added preprocessing and postprocessing functions for Gradio API integration.
- Enhanced ONNX model handling with improved logging and error management.
- Updated softmax function to return Python floats for better compatibility.
- Added new model configuration files for model_8 and updated existing configurations.

app_optimized.py CHANGED
@@ -12,6 +12,8 @@ import json
12
  from huggingface_hub import CommitScheduler, hf_hub_download, snapshot_download
13
  from dotenv import load_dotenv
14
  import concurrent.futures
 
 
15
 
16
  from utils.utils import softmax, augment_image
17
  from forensics.gradient import gradient_processing
@@ -27,7 +29,6 @@ from utils.registry import register_model, MODEL_REGISTRY, ModelEntry
27
  from agents.ensemble_weights import ModelWeightManager
28
  from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
29
  from torchvision import transforms
30
- import torch
31
 
32
  logging.basicConfig(level=logging.INFO)
33
  logger = logging.getLogger(__name__)
@@ -73,7 +74,8 @@ MODEL_PATHS = {
73
  "model_4": "cmckinle/sdxl-flux-detector_v1.1",
74
  "model_5": "LPX55/detection-model-5-ONNX",
75
  "model_6": "LPX55/detection-model-6-ONNX",
76
- "model_7": "LPX55/detection-model-7-ONNX"
 
77
  }
78
 
79
  CLASS_NAMES = {
@@ -84,6 +86,7 @@ CLASS_NAMES = {
84
  "model_5": ['Realism', 'Deepfake'],
85
  "model_6": ['ai_gen', 'human'],
86
  "model_7": ['Fake', 'Real'],
 
87
  }
88
 
89
  def preprocess_resize_256(image):
@@ -98,7 +101,7 @@ def preprocess_resize_224(image):
98
 
99
  def postprocess_pipeline(prediction, class_names):
100
  # Assumes HuggingFace pipeline output
101
- return {pred['label']: pred['score'] for pred in prediction}
102
 
103
  def postprocess_logits(outputs, class_names):
104
  # Assumes model output with logits
@@ -106,6 +109,83 @@ def postprocess_logits(outputs, class_names):
106
  probabilities = softmax(logits)
107
  return {class_names[i]: probabilities[i] for i in range(len(class_names))}
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path, architecture=None, dataset=None):
110
  entry = ModelEntry(model, preprocess, postprocess, class_names, display_name=display_name, contributor=contributor, model_path=model_path, architecture=architecture, dataset=dataset)
111
  MODEL_REGISTRY[model_id] = entry
@@ -157,12 +237,15 @@ def preprocess_onnx_input(image: Image.Image, preprocessor_config: dict):
157
  image = image.convert('RGB')
158
 
159
  # Get image size and normalization values from preprocessor_config or use defaults
160
- image_size = preprocessor_config.get('size', {'height': 224, 'width': 224})
 
 
161
  mean = preprocessor_config.get('image_mean', [0.485, 0.456, 0.406])
162
  std = preprocessor_config.get('image_std', [0.229, 0.224, 0.225])
163
 
164
  transform = transforms.Compose([
165
- transforms.Resize((image_size['height'], image_size['width'])),
 
166
  transforms.ToTensor(),
167
  transforms.Normalize(mean=mean, std=std),
168
  ])
@@ -170,16 +253,28 @@ def preprocess_onnx_input(image: Image.Image, preprocessor_config: dict):
170
  # ONNX expects numpy array with batch dimension (1, C, H, W)
171
  return input_tensor.unsqueeze(0).cpu().numpy()
172
 
173
- def infer_onnx_model(hf_model_id, preprocessed_image_np):
174
  try:
175
  ort_session, _, _ = get_onnx_model_from_cache(hf_model_id)
176
 
 
 
 
 
 
177
  ort_inputs = {ort_session.get_inputs()[0].name: preprocessed_image_np}
178
  ort_outputs = ort_session.run(None, ort_inputs)
179
 
180
- # Assuming the output is logits, apply softmax to get probabilities
181
  logits = ort_outputs[0]
182
- probabilities = softmax(logits[0]) # Remove batch dim, apply softmax
 
 
 
 
 
 
 
 
183
  return {"logits": logits, "probabilities": probabilities}
184
 
185
  except Exception as e:
@@ -188,15 +283,32 @@ def infer_onnx_model(hf_model_id, preprocessed_image_np):
188
  return {"logits": np.array([]), "probabilities": np.array([])}
189
 
190
  def postprocess_onnx_output(onnx_output, model_config):
191
- # Get class names from model_config, or use a default if not found
192
- class_names = model_config.get('id2label', {0: 'Fake', 1: 'Real'}) # Default to Fake/Real if not found
193
- class_names = [class_names[i] for i in sorted(class_names.keys())]
 
 
 
 
 
 
 
194
 
195
  probabilities = onnx_output.get("probabilities")
196
- if probabilities is not None and len(probabilities) == len(class_names):
197
- return {class_names[i]: probabilities[i] for i in range(len(class_names))}
 
 
 
 
 
 
 
 
 
 
198
  else:
199
- logger.warning("ONNX post-processing failed or class names mismatch.")
200
  return {name: 0.0 for name in class_names}
201
 
202
  # Register the ONNX quantized model
@@ -217,7 +329,8 @@ class ONNXModelWrapper:
217
 
218
  def __call__(self, image_np):
219
  self.load() # Ensure model is loaded on first call
220
- return infer_onnx_model(self.hf_model_id, image_np)
 
221
 
222
  def preprocess(self, image: Image.Image):
223
  self.load()
@@ -229,105 +342,133 @@ class ONNXModelWrapper:
229
 
230
  # Consolidate all model loading and registration
231
  for model_key, hf_model_path in MODEL_PATHS.items():
232
- display_name = model_key.replace("model_", "").upper()
 
233
  contributor = "Unknown"
234
  architecture = "Unknown"
235
  dataset = "TBA"
236
 
237
- # Attempt to derive contributor, architecture, dataset based on model_key
238
- if model_key == "model_1":
239
- contributor = "haywoodsloan"
240
- architecture = "SwinV2"
241
- dataset = "DeepFakeDetection"
242
- elif model_key == "model_2":
243
- contributor = "Heem2"
244
- architecture = "ViT"
245
- dataset = "DeepFakeDetection"
246
- elif model_key == "model_3":
247
- contributor = "Organika"
248
- architecture = "VIT"
249
- dataset = "SDXL"
250
- elif model_key == "model_4":
251
- contributor = "cmckinle"
252
- architecture = "VIT"
253
- dataset = "SDXL, FLUX"
254
- elif model_key == "model_5":
255
- contributor = "prithivMLmods"
256
- architecture = "VIT"
257
- elif model_key == "model_6":
258
- contributor = "ideepankarsharma2003"
259
- architecture = "SWINv1"
260
- dataset = "SDXL, Midjourney"
261
- elif model_key == "model_7":
262
- contributor = "date3k2"
263
- architecture = "VIT"
264
-
265
  current_class_names = CLASS_NAMES.get(model_key, [])
266
 
 
267
  if "ONNX" in hf_model_path:
 
268
  logger.info(f"Registering ONNX model: {model_key} from {hf_model_path}")
269
  onnx_wrapper_instance = ONNXModelWrapper(hf_model_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  register_model_with_metadata(
271
- model_key,
272
- onnx_wrapper_instance, # The callable wrapper for the ONNX model
273
- onnx_wrapper_instance.preprocess,
274
- onnx_wrapper_instance.postprocess,
275
- current_class_names, # Initial class names; will be overridden by model_config if available
276
- display_name=display_name + ("_ONNX" if "ONNX" not in display_name else ""),
277
  contributor=contributor,
278
  model_path=hf_model_path,
279
  architecture=architecture,
280
  dataset=dataset
281
  )
282
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  logger.info(f"Registering HuggingFace pipeline/AutoModel: {model_key} from {hf_model_path}")
284
- model_instance = None
285
- preprocess_func = None
286
- postprocess_func = postprocess_pipeline # Default for pipelines
287
- current_processor = None
288
-
289
- if model_key == "model_5":
290
- model_instance = pipeline("image-classification", model=hf_model_path, device=device)
291
- preprocess_func = preprocess_resize_224
292
- elif model_key in ["model_4"]:
293
- current_processor = AutoFeatureExtractor.from_pretrained(hf_model_path, device=device)
294
- model_instance = AutoModelForImageClassification.from_pretrained(hf_model_path).to(device)
295
-
296
- preprocess_func = preprocess_resize_256 # Using the shared preprocess_resize_256
297
- postprocess_func = postprocess_logits # Using the shared postprocess_logits
298
-
299
- def custom_infer(image, processor_local=current_processor, model_local=model_instance):
300
- inputs = processor_local(image, return_tensors="pt").to(device)
301
- with torch.no_grad():
302
- outputs = model_local(**inputs)
303
- return outputs
304
- model_instance = custom_infer
305
-
306
- elif model_key in ["model_6", "model_7"]:
307
- current_processor = AutoImageProcessor.from_pretrained(hf_model_path, use_fast=True)
308
- if model_key == "model_6":
309
- model_instance = SwinForImageClassification.from_pretrained(hf_model_path).to(device)
310
- else: # model_7
311
- model_instance = AutoModelForImageClassification.from_pretrained(hf_model_path).to(device)
312
-
313
- model_instance = pipeline(model=model_instance, task="image-classification", image_processor=current_processor, device=device)
314
- preprocess_func = preprocess_resize_224
315
-
316
- if model_instance and preprocess_func:
317
- register_model_with_metadata(
318
- model_id=model_key,
319
- model=model_instance,
320
- preprocess=preprocess_func,
321
- postprocess=postprocess_func,
322
- class_names=current_class_names,
323
- display_name=display_name,
324
- contributor=contributor,
325
- model_path=hf_model_path,
326
- architecture=architecture,
327
- dataset=dataset
328
- )
329
- else:
330
- logger.warning(f"Could not automatically load and register model: {model_key} from {hf_model_path}")
331
 
332
 
333
  def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict:
@@ -346,8 +487,14 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
346
  try:
347
  result = entry.model(img)
348
  scores = entry.postprocess(result, entry.class_names)
349
- ai_score = float(scores.get(entry.class_names[0], 0.0))
350
- real_score = float(scores.get(entry.class_names[1], 0.0))
 
 
 
 
 
 
351
  label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN")
352
  return {
353
  "Model": entry.display_name,
@@ -445,20 +592,30 @@ def full_prediction(img, confidence_threshold, rotate_degrees, noise_level, shar
445
  model_start = time.time()
446
  result = infer(img_pil, model_id, confidence_threshold)
447
  model_end = time.time()
 
 
 
 
 
 
 
 
 
 
448
  monitor_agent.monitor_prediction(
449
  model_id,
450
  result["Label"],
451
- max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)),
452
  model_end - model_start
453
  )
454
  model_predictions_raw[model_id] = result
455
- confidence_scores[model_id] = max(result.get("AI Score", 0.0), result.get("Real Score", 0.0))
456
  results.append(result)
457
  table_rows.append([
458
  result.get("Model", ""),
459
  result.get("Contributor", ""),
460
- round(result.get("AI Score", 0.0), 3) if result.get("AI Score") is not None else 0.0,
461
- round(result.get("Real Score", 0.0), 3) if result.get("Real Score") is not None else 0.0,
462
  result.get("Label", "Error")
463
  ])
464
  # Yield partial results: only update the table, others are None
@@ -817,7 +974,7 @@ demo = gr.TabbedInterface(
817
 
818
  )
819
  footerMD = """
820
- ### ⚠️ ENSEMBLE TEAM IN TRAINING ⚠️ \n\n
821
 
822
  1. **DISCLAIMER: METADATA AS WELL AS MEDIA SUBMITTED TO THIS SPACE MAY BE VIEWED AND SELECTED FOR FUTURE DATASETS, PLEASE DO NOT SUBMIT PERSONAL CONTENT. FOR UNTRACKED, PRIVATE USE OF THE MODELS YOU MAY STILL USE [THE ORIGINAL SPACE HERE](https://huggingface.co/spaces/aiwithoutborders-xyz/OpenSight-Deepfake-Detection-Models-Playground), SOTA MODEL INCLUDED.**
823
  2. **UPDATE 6-13-25**: APOLOGIES FOR THE CONFUSION, WE ARE WORKING TO REVERT THE ORIGINAL REPO BACK TO ITS NON-DATA COLLECTION STATE -- ONLY THE "SIMPLE PREDICTION" ENDPOINT IS CURRENTLY 100% PRIVATE. PLEASE STAY TUNED AS WE FIGURE OUT A SOLUTION FOR THE ENSEMBLE + AGENT TEAM ENDPOINT. IT CAN GET RESOURCE INTENSIVE TO RUN A FULL PREDICTION. ALTERNATIVELY, WE **ENCOURAGE** ANYONE TO FORK AND CONTRIBUTE TO THE PROJECT.
 
12
  from huggingface_hub import CommitScheduler, hf_hub_download, snapshot_download
13
  from dotenv import load_dotenv
14
  import concurrent.futures
15
+ import ast
16
+ import torch
17
 
18
  from utils.utils import softmax, augment_image
19
  from forensics.gradient import gradient_processing
 
29
  from agents.ensemble_weights import ModelWeightManager
30
  from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification
31
  from torchvision import transforms
 
32
 
33
  logging.basicConfig(level=logging.INFO)
34
  logger = logging.getLogger(__name__)
 
74
  "model_4": "cmckinle/sdxl-flux-detector_v1.1",
75
  "model_5": "LPX55/detection-model-5-ONNX",
76
  "model_6": "LPX55/detection-model-6-ONNX",
77
+ "model_7": "LPX55/detection-model-7-ONNX",
78
+ "model_8": "aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT"
79
  }
80
 
81
  CLASS_NAMES = {
 
86
  "model_5": ['Realism', 'Deepfake'],
87
  "model_6": ['ai_gen', 'human'],
88
  "model_7": ['Fake', 'Real'],
89
+ "model_8": ['Fake', 'Real'],
90
  }
91
 
92
  def preprocess_resize_256(image):
 
101
 
102
  def postprocess_pipeline(prediction, class_names):
103
  # Assumes HuggingFace pipeline output
104
+ return {pred['label']: float(pred['score']) for pred in prediction}
105
 
106
  def postprocess_logits(outputs, class_names):
107
  # Assumes model output with logits
 
109
  probabilities = softmax(logits)
110
  return {class_names[i]: probabilities[i] for i in range(len(class_names))}
111
 
112
+ def postprocess_binary_output(output, class_names):
113
+ # output can be a dictionary {"probabilities": numpy_array} or directly a numpy_array
114
+ probabilities_array = None
115
+ if isinstance(output, dict) and "probabilities" in output:
116
+ probabilities_array = output["probabilities"]
117
+ elif isinstance(output, np.ndarray):
118
+ probabilities_array = output
119
+ else:
120
+ logger.warning(f"Unexpected output type for binary post-processing: {type(output)}. Expected dict with 'probabilities' or numpy.ndarray.")
121
+ return {class_names[0]: 0.0, class_names[1]: 1.0}
122
+
123
+ logger.info(f"Debug: Probabilities array entering postprocess_binary_output: {probabilities_array}, type: {type(probabilities_array)}, shape: {probabilities_array.shape}")
124
+
125
+ if probabilities_array is None:
126
+ logger.warning("Probabilities array is None after extracting from output. Returning default scores.")
127
+ return {class_names[0]: 0.0, class_names[1]: 1.0}
128
+
129
+ if probabilities_array.size == 1:
130
+ fake_prob = float(probabilities_array.item())
131
+ elif probabilities_array.size == 2:
132
+ fake_prob = float(probabilities_array[0])
133
+ else:
134
+ logger.warning(f"Unexpected probabilities array shape for binary post-processing: {probabilities_array.shape}. Expected size 1 or 2.")
135
+ return {class_names[0]: 0.0, class_names[1]: 1.0}
136
+
137
+ real_prob = 1.0 - fake_prob # Ensure Fake and Real sum to 1
138
+ return {class_names[0]: fake_prob, class_names[1]: real_prob}
139
+
140
+ # New function to infer using Gradio API for model_8
141
+ def infer_gradio_api(image_path):
142
+ client = Client("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview")
143
+ result_str = client.predict(
144
+ input_image=handle_file(image_path),
145
+ api_name="/simple_predict"
146
+ )
147
+ logger.info(f"Debug: Raw result_str from Gradio API (model_8): {result_str}, type: {type(result_str)}")
148
+ try:
149
+ # Safely evaluate the string as a Python literal
150
+ result_dict = ast.literal_eval(result_str)
151
+ fake_probability = result_dict.get('Fake Probability', 0.0)
152
+ logger.info(f"Debug: Parsed result_dict: {result_dict}, Extracted fake_probability: {fake_probability}")
153
+ return {"probabilities": np.array([fake_probability])} # Return as a numpy array with one element
154
+ except Exception as e:
155
+ logger.error(f"Error parsing Gradio API output: {e}. Raw output: {result_str}")
156
+ return {"probabilities": np.array([0.0])}
157
+
158
+ # New preprocess function for Gradio API
159
+ def preprocess_gradio_api(image: Image.Image):
160
+ # The Gradio API expects a file path, so we need to save the PIL Image to a temporary file.
161
+ temp_file_path = "./temp_gradio_input.png"
162
+ image.save(temp_file_path)
163
+ return temp_file_path
164
+
165
+ # New postprocess function for Gradio API (adapting postprocess_binary_output)
166
+ def postprocess_gradio_api(gradio_output, class_names):
167
+ # gradio_output is expected to be a dictionary like {"probabilities": np.array([fake_prob])}
168
+ probabilities_array = None
169
+ if isinstance(gradio_output, dict) and "probabilities" in gradio_output:
170
+ probabilities_array = gradio_output["probabilities"]
171
+ elif isinstance(gradio_output, np.ndarray):
172
+ probabilities_array = gradio_output
173
+ else:
174
+ logger.warning(f"Unexpected output type for Gradio API post-processing: {type(gradio_output)}. Expected dict with 'probabilities' or numpy.ndarray.")
175
+ return {class_names[0]: 0.0, class_names[1]: 1.0}
176
+
177
+ logger.info(f"Debug: Probabilities array entering postprocess_gradio_api: {probabilities_array}, type: {type(probabilities_array)}, shape: {probabilities_array.shape}")
178
+
179
+ if probabilities_array is None or probabilities_array.size == 0:
180
+ logger.warning("Probabilities array is None or empty after extracting from Gradio API output. Returning default scores.")
181
+ return {class_names[0]: 0.0, class_names[1]: 1.0}
182
+
183
+ # It should always be a single element array for fake probability
184
+ fake_prob = float(probabilities_array.item())
185
+ real_prob = 1.0 - fake_prob
186
+
187
+ return {class_names[0]: fake_prob, class_names[1]: real_prob}
188
+
189
  def register_model_with_metadata(model_id, model, preprocess, postprocess, class_names, display_name, contributor, model_path, architecture=None, dataset=None):
190
  entry = ModelEntry(model, preprocess, postprocess, class_names, display_name=display_name, contributor=contributor, model_path=model_path, architecture=architecture, dataset=dataset)
191
  MODEL_REGISTRY[model_id] = entry
 
237
  image = image.convert('RGB')
238
 
239
  # Get image size and normalization values from preprocessor_config or use defaults
240
+ # Use 'size' for initial resize and 'crop_size' for center cropping
241
+ initial_resize_size = preprocessor_config.get('size', {'height': 224, 'width': 224})
242
+ crop_size = preprocessor_config.get('crop_size', initial_resize_size['height'])
243
  mean = preprocessor_config.get('image_mean', [0.485, 0.456, 0.406])
244
  std = preprocessor_config.get('image_std', [0.229, 0.224, 0.225])
245
 
246
  transform = transforms.Compose([
247
+ transforms.Resize((initial_resize_size['height'], initial_resize_size['width'])),
248
+ transforms.CenterCrop(crop_size), # Apply center crop
249
  transforms.ToTensor(),
250
  transforms.Normalize(mean=mean, std=std),
251
  ])
 
253
  # ONNX expects numpy array with batch dimension (1, C, H, W)
254
  return input_tensor.unsqueeze(0).cpu().numpy()
255
 
256
+ def infer_onnx_model(hf_model_id, preprocessed_image_np, model_config: dict):
257
  try:
258
  ort_session, _, _ = get_onnx_model_from_cache(hf_model_id)
259
 
260
+ # Debug: Print expected input shape from ONNX model
261
+ for input_meta in ort_session.get_inputs():
262
+ logger.info(f"Debug: ONNX model expected input name: {input_meta.name}, shape: {input_meta.shape}, type: {input_meta.type}")
263
+
264
+ logger.info(f"Debug: preprocessed_image_np shape: {preprocessed_image_np.shape}")
265
  ort_inputs = {ort_session.get_inputs()[0].name: preprocessed_image_np}
266
  ort_outputs = ort_session.run(None, ort_inputs)
267
 
 
268
  logits = ort_outputs[0]
269
+ logger.info(f"Debug: logits type: {type(logits)}, shape: {logits.shape}")
270
+ # If the model outputs a single logit (e.g., shape (1,)), use .item() to convert to scalar
271
+ # Otherwise, assume it's a batch of logits (e.g., shape (1, num_classes)) and take the first element (batch dim)
272
+ # The num_classes in config.json can be misleading; rely on actual output shape.
273
+
274
+ # Apply softmax to the logits to get probabilities for the classes
275
+ # The softmax function in utils/utils.py now ensures a list of floats
276
+ probabilities = softmax(logits[0]) # Assuming logits[0] is the relevant output for a single prediction
277
+
278
  return {"logits": logits, "probabilities": probabilities}
279
 
280
  except Exception as e:
 
283
  return {"logits": np.array([]), "probabilities": np.array([])}
284
 
285
  def postprocess_onnx_output(onnx_output, model_config):
286
+ # Get class names from model_config
287
+ # Prioritize id2label, then check num_classes, otherwise default
288
+ class_names_map = model_config.get('id2label')
289
+ if class_names_map:
290
+ class_names = [class_names_map[k] for k in sorted(class_names_map.keys())]
291
+ elif model_config.get('num_classes') == 1: # Handle models that output a single value (e.g., probability of 'Fake')
292
+ class_names = ['Fake', 'Real'] # Assume first class is 'Fake' and second 'Real'
293
+ else:
294
+ class_names = {0: 'Fake', 1: 'Real'} # Default to Fake/Real if not found or not 1 class
295
+ class_names = [class_names[i] for i in sorted(class_names.keys())]
296
 
297
  probabilities = onnx_output.get("probabilities")
298
+
299
+ if probabilities is not None:
300
+ if model_config.get('num_classes') == 1 and len(probabilities) == 2: # Special handling for single output models
301
+ # The single output is the probability of the 'Fake' class
302
+ fake_prob = float(probabilities[0])
303
+ real_prob = float(probabilities[1])
304
+ return {class_names[0]: fake_prob, class_names[1]: real_prob}
305
+ elif len(probabilities) == len(class_names):
306
+ return {class_names[i]: float(probabilities[i]) for i in range(len(class_names))}
307
+ else:
308
+ logger.warning("ONNX post-processing: Probabilities length mismatch with class names.")
309
+ return {name: 0.0 for name in class_names}
310
  else:
311
+ logger.warning("ONNX post-processing failed: 'probabilities' key not found in output.")
312
  return {name: 0.0 for name in class_names}
313
 
314
  # Register the ONNX quantized model
 
329
 
330
  def __call__(self, image_np):
331
  self.load() # Ensure model is loaded on first call
332
+ # Pass model_config to infer_onnx_model
333
+ return infer_onnx_model(self.hf_model_id, image_np, self._model_config)
334
 
335
  def preprocess(self, image: Image.Image):
336
  self.load()
 
342
 
343
  # Consolidate all model loading and registration
344
  for model_key, hf_model_path in MODEL_PATHS.items():
345
+ logger.debug(f"Attempting to register model: {model_key} with path: {hf_model_path}")
346
+ model_num = model_key.replace("model_", "").upper()
347
  contributor = "Unknown"
348
  architecture = "Unknown"
349
  dataset = "TBA"
350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
  current_class_names = CLASS_NAMES.get(model_key, [])
352
 
353
+ # Logic for ONNX models (1, 2, 3, 5, 6, 7)
354
  if "ONNX" in hf_model_path:
355
+ logger.debug(f"Model {model_key} identified as ONNX.")
356
  logger.info(f"Registering ONNX model: {model_key} from {hf_model_path}")
357
  onnx_wrapper_instance = ONNXModelWrapper(hf_model_path)
358
+
359
+ # Attempt to derive contributor, architecture, dataset based on model_key
360
+ if model_key == "model_1":
361
+ contributor = "haywoodsloan"
362
+ architecture = "SwinV2"
363
+ dataset = "DeepFakeDetection"
364
+ elif model_key == "model_2":
365
+ contributor = "Heem2"
366
+ architecture = "ViT"
367
+ dataset = "DeepFakeDetection"
368
+ elif model_key == "model_3":
369
+ contributor = "Organika"
370
+ architecture = "VIT"
371
+ dataset = "SDXL"
372
+ elif model_key == "model_5":
373
+ contributor = "prithivMLmods"
374
+ architecture = "VIT"
375
+ elif model_key == "model_6":
376
+ contributor = "ideepankarsharma2003"
377
+ architecture = "SWINv1"
378
+ dataset = "SDXL, Midjourney"
379
+ elif model_key == "model_7":
380
+ contributor = "date3k2"
381
+ architecture = "VIT"
382
+
383
+ display_name_parts = [model_num]
384
+ if architecture and architecture not in ["Unknown"]:
385
+ display_name_parts.append(architecture)
386
+ if dataset and dataset not in ["TBA"]:
387
+ display_name_parts.append(dataset)
388
+ display_name = "-".join(display_name_parts)
389
+ display_name += "_ONNX" # Always append _ONNX for ONNX models
390
+
391
  register_model_with_metadata(
392
+ model_id=model_key,
393
+ model=onnx_wrapper_instance, # The callable wrapper for the ONNX model
394
+ preprocess=onnx_wrapper_instance.preprocess,
395
+ postprocess=onnx_wrapper_instance.postprocess,
396
+ class_names=current_class_names, # Initial class names; will be overridden by model_config if available
397
+ display_name=display_name,
398
  contributor=contributor,
399
  model_path=hf_model_path,
400
  architecture=architecture,
401
  dataset=dataset
402
  )
403
+ # Logic for Gradio API model (model_8)
404
+ elif model_key == "model_8":
405
+ logger.debug(f"Model {model_key} identified as Gradio API.")
406
+ logger.info(f"Registering Gradio API model: {model_key} from {hf_model_path}")
407
+ contributor = "aiwithoutborders-xyz"
408
+ architecture = "ViT"
409
+ dataset = "DeepfakeDetection"
410
+
411
+ display_name_parts = [model_num]
412
+ if architecture and architecture not in ["Unknown"]:
413
+ display_name_parts.append(architecture)
414
+ if dataset and dataset not in ["TBA"]:
415
+ display_name_parts.append(dataset)
416
+ display_name = "-".join(display_name_parts)
417
+
418
+ register_model_with_metadata(
419
+ model_id=model_key,
420
+ model=infer_gradio_api,
421
+ preprocess=preprocess_gradio_api,
422
+ postprocess=postprocess_gradio_api,
423
+ class_names=current_class_names,
424
+ display_name=display_name,
425
+ contributor=contributor,
426
+ model_path=hf_model_path,
427
+ architecture=architecture,
428
+ dataset=dataset
429
+ )
430
+ # Logic for PyTorch/Hugging Face pipeline models (currently only model_4)
431
+ elif model_key == "model_4": # Explicitly handle model_4
432
+ logger.debug(f"Model {model_key} identified as PyTorch/HuggingFace pipeline.")
433
  logger.info(f"Registering HuggingFace pipeline/AutoModel: {model_key} from {hf_model_path}")
434
+ contributor = "cmckinle"
435
+ architecture = "VIT"
436
+ dataset = "SDXL, FLUX"
437
+
438
+ display_name_parts = [model_num]
439
+ if architecture and architecture not in ["Unknown"]:
440
+ display_name_parts.append(architecture)
441
+ if dataset and dataset not in ["TBA"]:
442
+ display_name_parts.append(dataset)
443
+ display_name = "-".join(display_name_parts)
444
+
445
+ current_processor = AutoFeatureExtractor.from_pretrained(hf_model_path, device=device)
446
+ model_instance = AutoModelForImageClassification.from_pretrained(hf_model_path).to(device)
447
+
448
+ preprocess_func = preprocess_resize_256
449
+ postprocess_func = postprocess_logits
450
+
451
+ def custom_infer(image, processor_local=current_processor, model_local=model_instance):
452
+ inputs = processor_local(image, return_tensors="pt").to(device)
453
+ with torch.no_grad():
454
+ outputs = model_local(**inputs)
455
+ return outputs
456
+ model_instance = custom_infer
457
+
458
+ register_model_with_metadata(
459
+ model_id=model_key,
460
+ model=model_instance,
461
+ preprocess=preprocess_func,
462
+ postprocess=postprocess_func,
463
+ class_names=current_class_names,
464
+ display_name=display_name,
465
+ contributor=contributor,
466
+ model_path=hf_model_path,
467
+ architecture=architecture,
468
+ dataset=dataset
469
+ )
470
+ else: # Fallback for any unhandled models (shouldn't happen if MODEL_PATHS is fully covered)
471
+ logger.warning(f"Could not automatically load and register model: {model_key} from {hf_model_path}. No matching registration logic found.")
 
 
 
 
 
 
 
 
 
472
 
473
 
474
  def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75) -> dict:
 
487
  try:
488
  result = entry.model(img)
489
  scores = entry.postprocess(result, entry.class_names)
490
+
491
+ def _to_float_scalar(value):
492
+ if isinstance(value, np.ndarray):
493
+ return float(value.item()) # Convert numpy array scalar to Python float
494
+ return float(value) # Already a Python scalar or convertible type
495
+
496
+ ai_score = _to_float_scalar(scores.get(entry.class_names[0], 0.0))
497
+ real_score = _to_float_scalar(scores.get(entry.class_names[1], 0.0))
498
  label = "AI" if ai_score >= confidence_threshold else ("REAL" if real_score >= confidence_threshold else "UNCERTAIN")
499
  return {
500
  "Model": entry.display_name,
 
592
  model_start = time.time()
593
  result = infer(img_pil, model_id, confidence_threshold)
594
  model_end = time.time()
595
+
596
+ # Helper to ensure values are Python floats, handling numpy scalars
597
+ def _ensure_float_scalar(value):
598
+ if isinstance(value, np.ndarray):
599
+ return float(value.item()) # Convert numpy array scalar to Python float
600
+ return float(value) # Already a Python scalar or convertible type
601
+
602
+ ai_score_val = _ensure_float_scalar(result.get("AI Score", 0.0))
603
+ real_score_val = _ensure_float_val = _ensure_float_scalar(result.get("Real Score", 0.0))
604
+
605
  monitor_agent.monitor_prediction(
606
  model_id,
607
  result["Label"],
608
+ max(ai_score_val, real_score_val),
609
  model_end - model_start
610
  )
611
  model_predictions_raw[model_id] = result
612
+ confidence_scores[model_id] = max(ai_score_val, real_score_val)
613
  results.append(result)
614
  table_rows.append([
615
  result.get("Model", ""),
616
  result.get("Contributor", ""),
617
+ round(ai_score_val, 5),
618
+ round(real_score_val, 5),
619
  result.get("Label", "Error")
620
  ])
621
  # Yield partial results: only update the table, others are None
 
974
 
975
  )
976
  footerMD = """
977
+ ## ⚠️ ENSEMBLE TEAM IN TRAINING ⚠️ \n\n
978
 
979
  1. **DISCLAIMER: METADATA AS WELL AS MEDIA SUBMITTED TO THIS SPACE MAY BE VIEWED AND SELECTED FOR FUTURE DATASETS, PLEASE DO NOT SUBMIT PERSONAL CONTENT. FOR UNTRACKED, PRIVATE USE OF THE MODELS YOU MAY STILL USE [THE ORIGINAL SPACE HERE](https://huggingface.co/spaces/aiwithoutborders-xyz/OpenSight-Deepfake-Detection-Models-Playground), SOTA MODEL INCLUDED.**
980
  2. **UPDATE 6-13-25**: APOLOGIES FOR THE CONFUSION, WE ARE WORKING TO REVERT THE ORIGINAL REPO BACK TO ITS NON-DATA COLLECTION STATE -- ONLY THE "SIMPLE PREDICTION" ENDPOINT IS CURRENTLY 100% PRIVATE. PLEASE STAY TUNED AS WE FIGURE OUT A SOLUTION FOR THE ENSEMBLE + AGENT TEAM ENDPOINT. IT CAN GET RESOURCE INTENSIVE TO RUN A FULL PREDICTION. ALTERNATIVELY, WE **ENCOURAGE** ANYONE TO FORK AND CONTRIBUTE TO THE PROJECT.
temp_model_config/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_attn_implementation_autoset": true,
3
+ "_name_or_path": "aiwithoutborders-xyz/CommunityForensics-DeepfakeDet-ViT",
4
+ "architectures": [
5
+ "ViTForImageClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "encoder_stride": 16,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.0,
11
+ "hidden_size": 384,
12
+ "image_size": 384,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-06,
16
+ "mlp_ratio": 4,
17
+ "model_type": "vit",
18
+ "num_attention_heads": 12,
19
+ "num_channels": 3,
20
+ "num_classes": 1,
21
+ "num_heads": 6,
22
+ "num_hidden_layers": 12,
23
+ "num_layers": 12,
24
+ "patch_size": 16,
25
+ "qkv_bias": true,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.49.0"
28
+ }
temp_original_vit_config/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ViTForImageClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.0,
6
+ "encoder_stride": 16,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.0,
9
+ "hidden_size": 384,
10
+ "image_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-06,
14
+ "mlp_ratio": 4,
15
+ "model_type": "vit",
16
+ "num_attention_heads": 12,
17
+ "num_channels": 3,
18
+ "num_classes": 1,
19
+ "num_heads": 6,
20
+ "num_hidden_layers": 12,
21
+ "num_layers": 12,
22
+ "patch_size": 16,
23
+ "qkv_bias": true,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.50.0.dev0"
26
+ }
utils/utils.py CHANGED
@@ -5,7 +5,8 @@ from torchvision import transforms
5
 
6
  def softmax(vector):
7
  e = np.exp(vector - np.max(vector)) # for numerical stability
8
- return e / e.sum()
 
9
 
10
  def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
11
  for method in methods:
 
5
 
6
  def softmax(vector):
7
  e = np.exp(vector - np.max(vector)) # for numerical stability
8
+ probabilities = e / e.sum()
9
+ return [float(p.item()) for p in probabilities] # Convert numpy array elements to Python floats using .item()
10
 
11
  def augment_image(img_pil, methods, rotate_degrees=0, noise_level=0, sharpen_strength=1):
12
  for method in methods: