OpenSight-Deepfake-Detection-Models-Playground

Running

App Files Files Community

LPX commited on 28 days ago

Commit

febce11

1 Parent(s): 679e047

feat: improve JSON handling and type safety in inference results and dataset logging

Browse files

Files changed (2) hide show

app_mcp.py +32 -8
utils/hf_logger.py +34 -21

app_mcp.py CHANGED Viewed

@@ -27,6 +27,7 @@ from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDete
 from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
 from agents.weight_management import ModelWeightManager
 from dotenv import load_dotenv
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -236,8 +237,8 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
             "Model": entry.display_name,
             "Contributor": entry.contributor,
             "HF Model Path": entry.model_path,
-            "AI Score": None,
-            "Real Score": None,
             "Label": f"Error: {str(e)}"
         }
@@ -386,11 +387,15 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
     table_rows = [[
         r.get("Model", ""),
         r.get("Contributor", ""),
-        r.get("AI Score", ""),
-        r.get("Real Score", ""),
-        r.get("Label", "")
     ] for r in results]
     # The get_consensus_label function is now replaced by final_prediction_label from weighted consensus
     consensus_html = f"<b><span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b>"
@@ -441,7 +446,26 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
         human_feedback=None # This can be populated later with human review data
     )
-    return img_pil, forensics_images, table_rows, results, consensus_html
 with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
     with ms.Application() as app:
@@ -509,8 +533,8 @@ with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ ov
                 gr.Markdown(QUICK_INTRO)
             with gr.Tab("👑 Community Forensics Preview"):
-                temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
-                # preview # no idea if this will work
             with gr.Tab("🥇 Leaderboard"):
                 gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")

 from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
 from agents.weight_management import ModelWeightManager
 from dotenv import load_dotenv
+import json
 # Configure logging
 logging.basicConfig(level=logging.INFO)
             "Model": entry.display_name,
             "Contributor": entry.contributor,
             "HF Model Path": entry.model_path,
+            "AI Score": 0.0,  # Ensure it's a float even on error
+            "Real Score": 0.0, # Ensure it's a float even on error
             "Label": f"Error: {str(e)}"
         }
     table_rows = [[
         r.get("Model", ""),
         r.get("Contributor", ""),
+        r.get("AI Score", 0.0) if r.get("AI Score") is not None else 0.0,
+        r.get("Real Score", 0.0) if r.get("Real Score") is not None else 0.0,
+        r.get("Label", "Error")
     ] for r in results]
+    logger.info(f"Type of table_rows: {type(table_rows)}")
+    for i, row in enumerate(table_rows):
+        logger.info(f"Row {i} types: {[type(item) for item in row]}")
     # The get_consensus_label function is now replaced by final_prediction_label from weighted consensus
     consensus_html = f"<b><span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b>"
         human_feedback=None # This can be populated later with human review data
     )
+    # Final type safety check for forensic_images before returning
+    cleaned_forensics_images = []
+    for f_img in forensics_images:
+        if isinstance(f_img, Image.Image):
+            cleaned_forensics_images.append(f_img)
+        elif isinstance(f_img, np.ndarray):
+            try:
+                cleaned_forensics_images.append(Image.fromarray(f_img))
+            except Exception as e:
+                logger.warning(f"Could not convert numpy array to PIL Image for gallery: {e}")
+                # Optionally, append a placeholder or skip
+        else:
+            logger.warning(f"Unexpected type in forensic_images: {type(f_img)}. Skipping.")
+    logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
+    # Return raw model results as JSON string for debug_json component
+    json_results = json.dumps(results)
+    return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
 with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
     with ms.Application() as app:
                 gr.Markdown(QUICK_INTRO)
             with gr.Tab("👑 Community Forensics Preview"):
+                # temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
+                gr.Markdown("Community Forensics Preview coming soon!") # Placeholder for now
             with gr.Tab("🥇 Leaderboard"):
                 gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")

utils/hf_logger.py CHANGED Viewed

@@ -5,7 +5,8 @@ import io
 import datetime
 from PIL import Image
 import logging
-from datasets import Dataset, load_dataset
 logger = logging.getLogger(__name__)
@@ -33,16 +34,29 @@ def initialize_dataset():
     except Exception:
         # If dataset does not exist, create a new one with an empty structure
         logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
         dataset = Dataset.from_dict({
             "timestamp": [],
-            "image": [], # Storing base64 string for simplicity, or path/bytes if preferred
             "inference_request": [],
             "model_predictions": [],
             "ensemble_output": [],
-            "forensic_outputs": [], # List of base64 image strings
             "agent_monitoring_data": [],
             "human_feedback": []
-        })
     return dataset
 def log_inference_data(
@@ -77,26 +91,25 @@ def log_inference_data(
         new_entry = {
             "timestamp": datetime.datetime.now().isoformat(),
             "image": original_image_b64,
-            "inference_request": inference_params,
-            "model_predictions": model_predictions,
-            "ensemble_output": ensemble_output,
-            "forensic_outputs": forensic_images_b64, # List of base64 image strings
-            "agent_monitoring_data": agent_monitoring_data,
-            "human_feedback": human_feedback if human_feedback is not None else {}
         }
-        logger.info(f"Type of original_image_b64: {type(original_image_b64)}")
-        logger.info(f"Type of inference_params: {type(inference_params)}")
-        logger.info(f"Type of model_predictions: {type(model_predictions)}")
-        logger.info(f"Type of ensemble_output: {type(ensemble_output)}")
-        logger.info(f"Type of forensic_images_b64: {type(forensic_images_b64)}")
-        logger.info(f"Type of agent_monitoring_data: {type(agent_monitoring_data)}")
-        logger.info(f"Type of human_feedback: {type(human_feedback)}")
-        # Append the new entry
-        # Note: Directly appending might not be efficient for large datasets or frequent logging
-        # For a production system, consider batched writes or more robust data pipelines.
-        updated_dataset = dataset.add_item(new_entry)
         # This will push to the Hugging Face Hub if you are logged in and dataset is configured
         # Or save locally if not.

 import datetime
 from PIL import Image
 import logging
+from datasets import Dataset, load_dataset, Features, Value, Sequence
+import copy
 logger = logging.getLogger(__name__)
     except Exception:
         # If dataset does not exist, create a new one with an empty structure
         logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
+        # Define the features explicitly
+        features = Features({
+            "timestamp": Value('string'),
+            "image": Value('string'), # base64 string
+            "inference_request": Value('string'), # JSON string
+            "model_predictions": Value('string'), # JSON string
+            "ensemble_output": Value('string'), # JSON string
+            "forensic_outputs": Sequence(Value('string')), # List of base64 image strings
+            "agent_monitoring_data": Value('string'), # JSON string
+            "human_feedback": Value('string') # JSON string
+        })
         dataset = Dataset.from_dict({
             "timestamp": [],
+            "image": [],
             "inference_request": [],
             "model_predictions": [],
             "ensemble_output": [],
+            "forensic_outputs": [],
             "agent_monitoring_data": [],
             "human_feedback": []
+        }, features=features) # Pass the features explicitly
     return dataset
 def log_inference_data(
         new_entry = {
             "timestamp": datetime.datetime.now().isoformat(),
             "image": original_image_b64,
+            "inference_request": json.dumps(inference_params),
+            "model_predictions": json.dumps(model_predictions),
+            "ensemble_output": json.dumps(ensemble_output),
+            "forensic_outputs": forensic_images_b64, # This is already a list of strings
+            "agent_monitoring_data": json.dumps(agent_monitoring_data),
+            "human_feedback": json.dumps(human_feedback if human_feedback is not None else {})
         }
+        # Get current dataset features
+        features = dataset.features
+        # Convert existing dataset to a list of dictionaries
+        dataset_list = dataset.to_list()
+        # Append the new entry to the list
+        dataset_list.append(new_entry)
+        # Create a new dataset from the updated list
+        updated_dataset = Dataset.from_list(dataset_list, features=features)
         # This will push to the Hugging Face Hub if you are logged in and dataset is configured
         # Or save locally if not.