mcp-deepfake-forensics

Running

App Files Files Community

LPX55 commited on Jun 10

Commit

5342779

1 Parent(s): 1146644

feat: refactor prediction functions and enhance image loading capabilities for improved processing and noise estimation

Browse files

Files changed (4) hide show

app.py +38 -17
forensics/__init__.py +2 -2
forensics/wavelet.py +1 -1
utils/load.py +51 -0

app.py CHANGED Viewed

@@ -13,9 +13,10 @@ from utils.utils import softmax, augment_image
 from forensics.gradient import gradient_processing
 from forensics.minmax import minmax_process
 from forensics.ela import ELA
-from forensics.wavelet import wavelet_blocking_noise_estimation
 from forensics.bitplane import bit_plane_extractor
 from utils.hf_logger import log_inference_data
 from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent
 from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent
 from utils.registry import register_model, MODEL_REGISTRY, ModelEntry
@@ -191,9 +192,10 @@ def simple_prediction(img):
     img_byte_arr = io.BytesIO()
     img.save(img_byte_arr, format='PNG') # Using PNG for lossless conversion, can be JPEG if preferred
     img_byte_arr.seek(0) # Rewind to the beginning of the stream
     result = client.predict(
-            input_image=handle_file(img_byte_arr),
             api_name="/simple_predict"
     )
     return result
@@ -247,15 +249,34 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
             "Label": f"Error: {str(e)}"
         }
-# --- Streaming Ensemble Prediction ---
-def ensemble_prediction_stream(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
-    # Setup (same as before)
     if not isinstance(img, Image.Image):
         try:
             img = Image.fromarray(img)
         except Exception as e:
             logger.error(f"Error converting input image to PIL: {e}")
-            raise ValueError("Input image could not be converted to PIL Image.")
     monitor_agent = EnsembleMonitorAgent()
     weight_manager = ModelWeightManager(strongest_model_id="simple_prediction")
@@ -406,7 +427,7 @@ def ensemble_prediction_stream(img, confidence_threshold, augment_methods, rotat
     yield img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
 detection_model_eval_playground = gr.Interface(
-    fn=ensemble_prediction_stream,
     inputs=[
         gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil'),
         gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold"),
@@ -426,8 +447,8 @@ detection_model_eval_playground = gr.Interface(
         gr.JSON(label="Raw Model Results", visible=False),
         gr.Markdown(label="Consensus", value="")
     ],
-    title="Open Source Detection Models Found on the Hub",
-    description="Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds once we're back on CUDA. The Community Forensics mother of all detection models is now available for inference, head to the middle tab above this. Lots of exciting things coming up, stay tuned!",
     api_name="predict",
     live=True  # Enable streaming
 )
@@ -436,9 +457,9 @@ community_forensics_preview = gr.Interface(
     fn=lambda: gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces"),
     inputs=None,
     outputs=gr.HTML(), # or gr.Markdown() if it's just text
-    title="Community Forensics Preview",
-    description="Community Forensics Preview coming soon!",
-    api_name="community_forensics"
 )
 leaderboard = gr.Interface(
@@ -453,13 +474,13 @@ simple_predict_interface = gr.Interface(
     fn=simple_prediction,
     inputs=gr.Image(type="filepath"),
     outputs=gr.Text(),
-    title="Simple and Fast Prediction",
-    description="",
     api_name="simple_predict"
 )
-wavelet_noise_estimation = gr.Interface(
-    fn=wavelet_blocking_noise_estimation,
     inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
     outputs=gr.Image(type="pil"),
     title="Wavelet-Based Noise Analysis",
@@ -529,7 +550,7 @@ demo = gr.TabbedInterface(
     [
         detection_model_eval_playground,
         simple_predict_interface,
-        wavelet_noise_estimation,
         bit_plane_interface,
         ela_interface,
         gradient_processing_interface,

 from forensics.gradient import gradient_processing
 from forensics.minmax import minmax_process
 from forensics.ela import ELA
+from forensics.wavelet import noise_estimation
 from forensics.bitplane import bit_plane_extractor
 from utils.hf_logger import log_inference_data
+from utils.load import load_image
 from agents.ensemble_team import EnsembleMonitorAgent, WeightOptimizationAgent, SystemHealthAgent
 from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDetectionAgent
 from utils.registry import register_model, MODEL_REGISTRY, ModelEntry
     img_byte_arr = io.BytesIO()
     img.save(img_byte_arr, format='PNG') # Using PNG for lossless conversion, can be JPEG if preferred
     img_byte_arr.seek(0) # Rewind to the beginning of the stream
+    im = load_image(img)
     result = client.predict(
+            input_image=handle_file(im),
             api_name="/simple_predict"
     )
     return result
             "Label": f"Error: {str(e)}"
         }
+def full_prediction(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
+    """Full prediction run, with a team of ensembles and agents.
+    Args:
+        img (url: str, Image.Image, np.ndarray): The input image to classify.
+        confidence_threshold (float, optional): The confidence threshold for classification. Defaults to 0.75.
+        augment_methods (list, optional): The augmentation methods to use.
+        rotate_degrees (int, optional): The degrees to rotate the image.
+        noise_level (int, optional): The noise level to use.
+        sharpen_strength (int, optional): The sharpen strength to use.
+    Returns:
+        dict: A dictionary containing the model details, classification scores, and label.
+    """
+    # Ensure img is a PIL Image object
+    if img is None:
+        raise gr.Error("No image provided. Please upload an image to analyze.")
     if not isinstance(img, Image.Image):
         try:
             img = Image.fromarray(img)
         except Exception as e:
             logger.error(f"Error converting input image to PIL: {e}")
+            raise gr.Error("Input image could not be converted to a valid image format. Please try another image.")
+    # Ensure image is in RGB format for consistent processing
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
     monitor_agent = EnsembleMonitorAgent()
     weight_manager = ModelWeightManager(strongest_model_id="simple_prediction")
     yield img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
 detection_model_eval_playground = gr.Interface(
+    fn=full_prediction,
     inputs=[
         gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil'),
         gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold"),
         gr.JSON(label="Raw Model Results", visible=False),
         gr.Markdown(label="Consensus", value="")
     ],
+    title="Multi-Model Ensemble + Agentic Coordinated Deepfake Detection",
+    description="The detection of AI-generated images has entered a critical inflection point. While existing solutions struggle with outdated datasets and inflated claims, our approach prioritizes agility, community collaboration, and an offensive approach to deepfake detection.",
     api_name="predict",
     live=True  # Enable streaming
 )
     fn=lambda: gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces"),
     inputs=None,
     outputs=gr.HTML(), # or gr.Markdown() if it's just text
+    title="Quick and simple prediction by our strongest model.",
+    description="No ensemble, no context, no agents, just a quick and simple prediction by our strongest model.",
+    api_name="quick_predict"
 )
 leaderboard = gr.Interface(
     fn=simple_prediction,
     inputs=gr.Image(type="filepath"),
     outputs=gr.Text(),
+    title="Quick and simple prediction by our strongest model.",
+    description="No ensemble, no context, no agents, just a quick and simple prediction by our strongest model.",
     api_name="simple_predict"
 )
+noise_estimation_interface = gr.Interface(
+    fn=noise_estimation,
     inputs=[gr.Image(type="pil"), gr.Slider(1, 32, value=8, step=1, label="Block Size")],
     outputs=gr.Image(type="pil"),
     title="Wavelet-Based Noise Analysis",
     [
         detection_model_eval_playground,
         simple_predict_interface,
+        noise_estimation_interface,
         bit_plane_interface,
         ela_interface,
         gradient_processing_interface,

forensics/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ from .ela import ELA
 # from .exif import exif_full_dump
 from .gradient import gradient_processing
 from .minmax import minmax_process
-from .wavelet import wavelet_blocking_noise_estimation
 __all__ = [
     'bit_plane_extractor',
@@ -11,5 +11,5 @@ __all__ = [
      # 'exif_full_dump',
     'gradient_processing',
     'minmax_process',
-    'wavelet_blocking_noise_estimation'
 ]

 # from .exif import exif_full_dump
 from .gradient import gradient_processing
 from .minmax import minmax_process
+from .wavelet import noise_estimation
 __all__ = [
     'bit_plane_extractor',
      # 'exif_full_dump',
     'gradient_processing',
     'minmax_process',
+    'noise_estimation'
 ]

forensics/wavelet.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pywt
 import cv2
 from PIL import Image
-def wavelet_blocking_noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
     """Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
     im = np.array(image.convert('L'))
     y = np.double(im)

 import cv2
 from PIL import Image
+def noise_estimation(image: Image.Image, blocksize: int = 8) -> Image.Image:
     """Estimate local noise using wavelet blocking. Returns a PIL image of the noise map."""
     im = np.array(image.convert('L'))
     y = np.double(im)

utils/load.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+import tempfile
+from typing import Any, Callable, List, Optional, Tuple, Union
+from urllib.parse import unquote, urlparse
+import PIL.Image
+import PIL.ImageOps
+import requests
+def load_image(
+    image: Union[str, PIL.Image.Image], convert_method: Optional[Callable[[PIL.Image.Image], PIL.Image.Image]] = None
+) -> PIL.Image.Image:
+    """
+    Loads `image` to a PIL Image.
+    Args:
+        image (`str` or `PIL.Image.Image`):
+            The image to convert to the PIL Image format.
+        convert_method (Callable[[PIL.Image.Image], PIL.Image.Image], *optional*):
+            A conversion method to apply to the image after loading it. When set to `None` the image will be converted
+            "RGB".
+    Returns:
+        `PIL.Image.Image`:
+            A PIL Image.
+    """
+    if isinstance(image, str):
+        if image.startswith("http://") or image.startswith("https://"):
+            image = PIL.Image.open(requests.get(image, stream=True, timeout=600).raw)
+        elif os.path.isfile(image):
+            image = PIL.Image.open(image)
+        else:
+            raise ValueError(
+                f"Incorrect path or URL. URLs must start with `http://` or `https://`, and {image} is not a valid path."
+            )
+    elif isinstance(image, PIL.Image.Image):
+        image = image
+    else:
+        raise ValueError(
+            "Incorrect format used for the image. Should be a URL linking to an image, a local path, or a PIL image."
+        )
+    image = PIL.ImageOps.exif_transpose(image)
+    if convert_method is not None:
+        image = convert_method(image)
+    else:
+        image = image.convert("RGB")
+    return image