mcp-deepfake-forensics

Running

App Files Files Community

LPX55 commited on 10 days ago

Commit

c56a0f7

1 Parent(s): be96dd0

major(feat): implement streaming ensemble prediction to enhance real-time model inference and update interface for live results

Browse files

Files changed (1) hide show

app.py +22 -62

app.py CHANGED Viewed

@@ -240,23 +240,9 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
             "Label": f"Error: {str(e)}"
         }
-def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
-    """Full ensemble prediction pipeline.
-    Args:
-        img (Image.Image): The input image to classify.
-        confidence_threshold (float): The confidence threshold for classification.
-        augment_methods (list): The augmentation methods to apply to the image.
-        rotate_degrees (int): The degrees to rotate the image.
-        noise_level (int): The noise level to add to the image.
-        sharpen_strength (int): The strength of the sharpening to apply to the image.
-    Raises:
-        ValueError: If the input image could not be converted to a PIL Image.
-    Returns:
-        tuple: A tuple containing the processed image, forensic images, model predictions, raw model results, and consensus.
-    """
     if not isinstance(img, Image.Image):
         try:
             img = Image.fromarray(img)
@@ -270,35 +256,44 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
     health_agent = SystemHealthAgent()
     context_agent = ContextualIntelligenceAgent()
     anomaly_agent = ForensicAnomalyDetectionAgent()
     health_agent.monitor_system_health()
     if augment_methods:
         img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength)
     else:
         img_pil = img
-    img_np_og = np.array(img)  # Convert PIL Image to NumPy array
     model_predictions_raw = {}
     confidence_scores = {}
     results = []
     for model_id in MODEL_REGISTRY:
         model_start = time.time()
         result = infer(img_pil, model_id, confidence_threshold)
         model_end = time.time()
         monitor_agent.monitor_prediction(
             model_id,
             result["Label"],
             max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)),
             model_end - model_start
         )
         model_predictions_raw[model_id] = result
         confidence_scores[model_id] = max(result.get("AI Score", 0.0), result.get("Real Score", 0.0))
         results.append(result)
     image_data_for_context = {
         "width": img.width,
         "height": img.height,
@@ -306,43 +301,29 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
     }
     detected_context_tags = context_agent.infer_context_tags(image_data_for_context, model_predictions_raw)
     logger.info(f"Detected context tags: {detected_context_tags}")
     adjusted_weights = weight_manager.adjust_weights(model_predictions_raw, confidence_scores, context_tags=detected_context_tags)
-    weighted_predictions = {
-        "AI": 0.0,
-        "REAL": 0.0,
-        "UNCERTAIN": 0.0
-    }
     for model_id, prediction in model_predictions_raw.items():
         prediction_label = prediction.get("Label")
         if prediction_label in weighted_predictions:
             weighted_predictions[prediction_label] += adjusted_weights[model_id]
         else:
             logger.warning(f"Unexpected prediction label '{prediction_label}' from model '{model_id}'. Skipping its weight in consensus.")
     final_prediction_label = "UNCERTAIN"
     if weighted_predictions["AI"] > weighted_predictions["REAL"] and weighted_predictions["AI"] > weighted_predictions["UNCERTAIN"]:
         final_prediction_label = "AI"
     elif weighted_predictions["REAL"] > weighted_predictions["AI"] and weighted_predictions["REAL"] > weighted_predictions["UNCERTAIN"]:
         final_prediction_label = "REAL"
     optimization_agent.analyze_performance(final_prediction_label, None)
     gradient_image = gradient_processing(img_np_og)
     gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True)
     minmax_image = minmax_process(img_np_og)
     minmax_image2 = minmax_process(img_np_og, radius=6)
     bitplane_image = bit_plane_extractor(img_pil)
     ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
     ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
     ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
     forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2, bitplane_image]
     forensic_output_descriptions = [
         f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}",
         "ELA analysis (Pass 1): Grayscale error map, quality 75.",
@@ -356,21 +337,7 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
     ]
     anomaly_detection_results = anomaly_agent.analyze_forensic_outputs(forensic_output_descriptions)
     logger.info(f"Forensic anomaly detection: {anomaly_detection_results['summary']}")
-    table_rows = [[
-        r.get("Model", ""),
-        r.get("Contributor", ""),
-        round(r.get("AI Score", 0.0), 3) if r.get("AI Score") is not None else 0.0,
-        round(r.get("Real Score", 0.0), 3) if r.get("Real Score") is not None else 0.0,
-        r.get("Label", "Error")
-    ] for r in results]
-    logger.info(f"Type of table_rows: {type(table_rows)}")
-    for i, row in enumerate(table_rows):
-        logger.info(f"Row {i} types: {[type(item) for item in row]}")
     consensus_html = f"<b><span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b>"
     inference_params = {
         "confidence_threshold": confidence_threshold,
         "augment_methods": augment_methods,
@@ -379,13 +346,11 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
         "sharpen_strength": sharpen_strength,
         "detected_context_tags": detected_context_tags
     }
     ensemble_output_data = {
         "final_prediction_label": final_prediction_label,
         "weighted_predictions": weighted_predictions,
         "adjusted_weights": adjusted_weights
     }
     agent_monitoring_data_log = {
         "ensemble_monitor": {
             "alerts": monitor_agent.alerts,
@@ -403,7 +368,6 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
         },
         "forensic_anomaly_detection": anomaly_detection_results
     }
     log_inference_data(
         original_image=img,
         inference_params=inference_params,
@@ -413,7 +377,6 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
         agent_monitoring_data=agent_monitoring_data_log,
         human_feedback=None
     )
     cleaned_forensics_images = []
     for f_img in forensics_images:
         if isinstance(f_img, Image.Image):
@@ -425,22 +388,18 @@ def ensemble_prediction(img, confidence_threshold, augment_methods, rotate_degre
                 logger.warning(f"Could not convert numpy array to PIL Image for gallery: {e}")
         else:
             logger.warning(f"Unexpected type in forensic_images: {type(f_img)}. Skipping.")
     logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
     for i, res_dict in enumerate(results):
         for key in ["AI Score", "Real Score"]:
             value = res_dict.get(key)
             if isinstance(value, np.float32):
                 res_dict[key] = float(value)
                 logger.info(f"Converted {key} for result {i} from numpy.float32 to float.")
     json_results = json.dumps(results, cls=NumpyEncoder)
-    return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
 detection_model_eval_playground = gr.Interface(
-    fn=ensemble_prediction,
     inputs=[
         gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil'),
         gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold"),
@@ -462,7 +421,8 @@ detection_model_eval_playground = gr.Interface(
     ],
     title="Open Source Detection Models Found on the Hub",
     description="Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds once we're back on CUDA. The Community Forensics mother of all detection models is now available for inference, head to the middle tab above this. Lots of exciting things coming up, stay tuned!",
-    api_name="predict"
 )
 community_forensics_preview = gr.Interface(

             "Label": f"Error: {str(e)}"
         }
+# --- Streaming Ensemble Prediction ---
+def ensemble_prediction_stream(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength):
+    # Setup (same as before)
     if not isinstance(img, Image.Image):
         try:
             img = Image.fromarray(img)
     health_agent = SystemHealthAgent()
     context_agent = ContextualIntelligenceAgent()
     anomaly_agent = ForensicAnomalyDetectionAgent()
     health_agent.monitor_system_health()
     if augment_methods:
         img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength)
     else:
         img_pil = img
+    img_np_og = np.array(img)
     model_predictions_raw = {}
     confidence_scores = {}
     results = []
+    table_rows = []
+    # Stream results as each model finishes
     for model_id in MODEL_REGISTRY:
         model_start = time.time()
         result = infer(img_pil, model_id, confidence_threshold)
         model_end = time.time()
         monitor_agent.monitor_prediction(
             model_id,
             result["Label"],
             max(result.get("AI Score", 0.0), result.get("Real Score", 0.0)),
             model_end - model_start
         )
         model_predictions_raw[model_id] = result
         confidence_scores[model_id] = max(result.get("AI Score", 0.0), result.get("Real Score", 0.0))
         results.append(result)
+        table_rows.append([
+            result.get("Model", ""),
+            result.get("Contributor", ""),
+            round(result.get("AI Score", 0.0), 3) if result.get("AI Score") is not None else 0.0,
+            round(result.get("Real Score", 0.0), 3) if result.get("Real Score") is not None else 0.0,
+            result.get("Label", "Error")
+        ])
+        # Yield partial results: only update the table, others are None
+        yield None, None, table_rows, None, None
+    # After all models, compute the rest as before
     image_data_for_context = {
         "width": img.width,
         "height": img.height,
     }
     detected_context_tags = context_agent.infer_context_tags(image_data_for_context, model_predictions_raw)
     logger.info(f"Detected context tags: {detected_context_tags}")
     adjusted_weights = weight_manager.adjust_weights(model_predictions_raw, confidence_scores, context_tags=detected_context_tags)
+    weighted_predictions = {"AI": 0.0, "REAL": 0.0, "UNCERTAIN": 0.0}
     for model_id, prediction in model_predictions_raw.items():
         prediction_label = prediction.get("Label")
         if prediction_label in weighted_predictions:
             weighted_predictions[prediction_label] += adjusted_weights[model_id]
         else:
             logger.warning(f"Unexpected prediction label '{prediction_label}' from model '{model_id}'. Skipping its weight in consensus.")
     final_prediction_label = "UNCERTAIN"
     if weighted_predictions["AI"] > weighted_predictions["REAL"] and weighted_predictions["AI"] > weighted_predictions["UNCERTAIN"]:
         final_prediction_label = "AI"
     elif weighted_predictions["REAL"] > weighted_predictions["AI"] and weighted_predictions["REAL"] > weighted_predictions["UNCERTAIN"]:
         final_prediction_label = "REAL"
     optimization_agent.analyze_performance(final_prediction_label, None)
     gradient_image = gradient_processing(img_np_og)
     gradient_image2 = gradient_processing(img_np_og, intensity=45, equalize=True)
     minmax_image = minmax_process(img_np_og)
     minmax_image2 = minmax_process(img_np_og, radius=6)
     bitplane_image = bit_plane_extractor(img_pil)
     ela1 = ELA(img_np_og, quality=75, scale=50, contrast=20, linear=False, grayscale=True)
     ela2 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=True)
     ela3 = ELA(img_np_og, quality=75, scale=75, contrast=25, linear=False, grayscale=False)
     forensics_images = [img_pil, ela1, ela2, ela3, gradient_image, gradient_image2, minmax_image, minmax_image2, bitplane_image]
     forensic_output_descriptions = [
         f"Original augmented image (PIL): {img_pil.width}x{img_pil.height}",
         "ELA analysis (Pass 1): Grayscale error map, quality 75.",
     ]
     anomaly_detection_results = anomaly_agent.analyze_forensic_outputs(forensic_output_descriptions)
     logger.info(f"Forensic anomaly detection: {anomaly_detection_results['summary']}")
     consensus_html = f"<b><span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b>"
     inference_params = {
         "confidence_threshold": confidence_threshold,
         "augment_methods": augment_methods,
         "sharpen_strength": sharpen_strength,
         "detected_context_tags": detected_context_tags
     }
     ensemble_output_data = {
         "final_prediction_label": final_prediction_label,
         "weighted_predictions": weighted_predictions,
         "adjusted_weights": adjusted_weights
     }
     agent_monitoring_data_log = {
         "ensemble_monitor": {
             "alerts": monitor_agent.alerts,
         },
         "forensic_anomaly_detection": anomaly_detection_results
     }
     log_inference_data(
         original_image=img,
         inference_params=inference_params,
         agent_monitoring_data=agent_monitoring_data_log,
         human_feedback=None
     )
     cleaned_forensics_images = []
     for f_img in forensics_images:
         if isinstance(f_img, Image.Image):
                 logger.warning(f"Could not convert numpy array to PIL Image for gallery: {e}")
         else:
             logger.warning(f"Unexpected type in forensic_images: {type(f_img)}. Skipping.")
     logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
     for i, res_dict in enumerate(results):
         for key in ["AI Score", "Real Score"]:
             value = res_dict.get(key)
             if isinstance(value, np.float32):
                 res_dict[key] = float(value)
                 logger.info(f"Converted {key} for result {i} from numpy.float32 to float.")
     json_results = json.dumps(results, cls=NumpyEncoder)
+    yield img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
 detection_model_eval_playground = gr.Interface(
+    fn=ensemble_prediction_stream,
     inputs=[
         gr.Image(label="Upload Image to Analyze", sources=['upload', 'webcam'], type='pil'),
         gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Confidence Threshold"),
     ],
     title="Open Source Detection Models Found on the Hub",
     description="Space will be upgraded shortly; inference on all 6 models should take about 1.2~ seconds once we're back on CUDA. The Community Forensics mother of all detection models is now available for inference, head to the middle tab above this. Lots of exciting things coming up, stay tuned!",
+    api_name="predict",
+    live=True  # Enable streaming
 )
 community_forensics_preview = gr.Interface(