LPX commited on
Commit
febce11
·
1 Parent(s): 679e047

feat: improve JSON handling and type safety in inference results and dataset logging

Browse files
Files changed (2) hide show
  1. app_mcp.py +32 -8
  2. utils/hf_logger.py +34 -21
app_mcp.py CHANGED
@@ -27,6 +27,7 @@ from agents.smart_agents import ContextualIntelligenceAgent, ForensicAnomalyDete
27
  from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
28
  from agents.weight_management import ModelWeightManager
29
  from dotenv import load_dotenv
 
30
 
31
  # Configure logging
32
  logging.basicConfig(level=logging.INFO)
@@ -236,8 +237,8 @@ def infer(image: Image.Image, model_id: str, confidence_threshold: float = 0.75)
236
  "Model": entry.display_name,
237
  "Contributor": entry.contributor,
238
  "HF Model Path": entry.model_path,
239
- "AI Score": None,
240
- "Real Score": None,
241
  "Label": f"Error: {str(e)}"
242
  }
243
 
@@ -386,11 +387,15 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
386
  table_rows = [[
387
  r.get("Model", ""),
388
  r.get("Contributor", ""),
389
- r.get("AI Score", ""),
390
- r.get("Real Score", ""),
391
- r.get("Label", "")
392
  ] for r in results]
393
 
 
 
 
 
394
  # The get_consensus_label function is now replaced by final_prediction_label from weighted consensus
395
  consensus_html = f"<b><span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b>"
396
 
@@ -441,7 +446,26 @@ def predict_image_with_json(img, confidence_threshold, augment_methods, rotate_d
441
  human_feedback=None # This can be populated later with human review data
442
  )
443
 
444
- return img_pil, forensics_images, table_rows, results, consensus_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
 
446
  with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
447
  with ms.Application() as app:
@@ -509,8 +533,8 @@ with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ ov
509
  gr.Markdown(QUICK_INTRO)
510
 
511
  with gr.Tab("👑 Community Forensics Preview"):
512
- temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
513
- # preview # no idea if this will work
514
  with gr.Tab("🥇 Leaderboard"):
515
  gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
516
 
 
27
  from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
28
  from agents.weight_management import ModelWeightManager
29
  from dotenv import load_dotenv
30
+ import json
31
 
32
  # Configure logging
33
  logging.basicConfig(level=logging.INFO)
 
237
  "Model": entry.display_name,
238
  "Contributor": entry.contributor,
239
  "HF Model Path": entry.model_path,
240
+ "AI Score": 0.0, # Ensure it's a float even on error
241
+ "Real Score": 0.0, # Ensure it's a float even on error
242
  "Label": f"Error: {str(e)}"
243
  }
244
 
 
387
  table_rows = [[
388
  r.get("Model", ""),
389
  r.get("Contributor", ""),
390
+ r.get("AI Score", 0.0) if r.get("AI Score") is not None else 0.0,
391
+ r.get("Real Score", 0.0) if r.get("Real Score") is not None else 0.0,
392
+ r.get("Label", "Error")
393
  ] for r in results]
394
 
395
+ logger.info(f"Type of table_rows: {type(table_rows)}")
396
+ for i, row in enumerate(table_rows):
397
+ logger.info(f"Row {i} types: {[type(item) for item in row]}")
398
+
399
  # The get_consensus_label function is now replaced by final_prediction_label from weighted consensus
400
  consensus_html = f"<b><span style='color:{'red' if final_prediction_label == 'AI' else ('green' if final_prediction_label == 'REAL' else 'orange')}'>{final_prediction_label}</span></b>"
401
 
 
446
  human_feedback=None # This can be populated later with human review data
447
  )
448
 
449
+ # Final type safety check for forensic_images before returning
450
+ cleaned_forensics_images = []
451
+ for f_img in forensics_images:
452
+ if isinstance(f_img, Image.Image):
453
+ cleaned_forensics_images.append(f_img)
454
+ elif isinstance(f_img, np.ndarray):
455
+ try:
456
+ cleaned_forensics_images.append(Image.fromarray(f_img))
457
+ except Exception as e:
458
+ logger.warning(f"Could not convert numpy array to PIL Image for gallery: {e}")
459
+ # Optionally, append a placeholder or skip
460
+ else:
461
+ logger.warning(f"Unexpected type in forensic_images: {type(f_img)}. Skipping.")
462
+
463
+ logger.info(f"Cleaned forensic images types: {[type(img) for img in cleaned_forensics_images]}")
464
+
465
+ # Return raw model results as JSON string for debug_json component
466
+ json_results = json.dumps(results)
467
+
468
+ return img_pil, cleaned_forensics_images, table_rows, json_results, consensus_html
469
 
470
  with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ overflow-y: hidden !important;} .ms-gr-ant-welcome-icon{ height:unset !important;} .tabs{margin-top:10px;}") as demo:
471
  with ms.Application() as app:
 
533
  gr.Markdown(QUICK_INTRO)
534
 
535
  with gr.Tab("👑 Community Forensics Preview"):
536
+ # temp_space = gr.load("aiwithoutborders-xyz/OpenSight-Community-Forensics-Preview", src="spaces")
537
+ gr.Markdown("Community Forensics Preview coming soon!") # Placeholder for now
538
  with gr.Tab("🥇 Leaderboard"):
539
  gr.Markdown("# AI Generated / Deepfake Detection Models Leaderboard: Soon™")
540
 
utils/hf_logger.py CHANGED
@@ -5,7 +5,8 @@ import io
5
  import datetime
6
  from PIL import Image
7
  import logging
8
- from datasets import Dataset, load_dataset
 
9
 
10
  logger = logging.getLogger(__name__)
11
 
@@ -33,16 +34,29 @@ def initialize_dataset():
33
  except Exception:
34
  # If dataset does not exist, create a new one with an empty structure
35
  logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  dataset = Dataset.from_dict({
37
  "timestamp": [],
38
- "image": [], # Storing base64 string for simplicity, or path/bytes if preferred
39
  "inference_request": [],
40
  "model_predictions": [],
41
  "ensemble_output": [],
42
- "forensic_outputs": [], # List of base64 image strings
43
  "agent_monitoring_data": [],
44
  "human_feedback": []
45
- })
46
  return dataset
47
 
48
  def log_inference_data(
@@ -77,26 +91,25 @@ def log_inference_data(
77
  new_entry = {
78
  "timestamp": datetime.datetime.now().isoformat(),
79
  "image": original_image_b64,
80
- "inference_request": inference_params,
81
- "model_predictions": model_predictions,
82
- "ensemble_output": ensemble_output,
83
- "forensic_outputs": forensic_images_b64, # List of base64 image strings
84
- "agent_monitoring_data": agent_monitoring_data,
85
- "human_feedback": human_feedback if human_feedback is not None else {}
86
  }
87
 
88
- logger.info(f"Type of original_image_b64: {type(original_image_b64)}")
89
- logger.info(f"Type of inference_params: {type(inference_params)}")
90
- logger.info(f"Type of model_predictions: {type(model_predictions)}")
91
- logger.info(f"Type of ensemble_output: {type(ensemble_output)}")
92
- logger.info(f"Type of forensic_images_b64: {type(forensic_images_b64)}")
93
- logger.info(f"Type of agent_monitoring_data: {type(agent_monitoring_data)}")
94
- logger.info(f"Type of human_feedback: {type(human_feedback)}")
 
95
 
96
- # Append the new entry
97
- # Note: Directly appending might not be efficient for large datasets or frequent logging
98
- # For a production system, consider batched writes or more robust data pipelines.
99
- updated_dataset = dataset.add_item(new_entry)
100
 
101
  # This will push to the Hugging Face Hub if you are logged in and dataset is configured
102
  # Or save locally if not.
 
5
  import datetime
6
  from PIL import Image
7
  import logging
8
+ from datasets import Dataset, load_dataset, Features, Value, Sequence
9
+ import copy
10
 
11
  logger = logging.getLogger(__name__)
12
 
 
34
  except Exception:
35
  # If dataset does not exist, create a new one with an empty structure
36
  logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
37
+
38
+ # Define the features explicitly
39
+ features = Features({
40
+ "timestamp": Value('string'),
41
+ "image": Value('string'), # base64 string
42
+ "inference_request": Value('string'), # JSON string
43
+ "model_predictions": Value('string'), # JSON string
44
+ "ensemble_output": Value('string'), # JSON string
45
+ "forensic_outputs": Sequence(Value('string')), # List of base64 image strings
46
+ "agent_monitoring_data": Value('string'), # JSON string
47
+ "human_feedback": Value('string') # JSON string
48
+ })
49
+
50
  dataset = Dataset.from_dict({
51
  "timestamp": [],
52
+ "image": [],
53
  "inference_request": [],
54
  "model_predictions": [],
55
  "ensemble_output": [],
56
+ "forensic_outputs": [],
57
  "agent_monitoring_data": [],
58
  "human_feedback": []
59
+ }, features=features) # Pass the features explicitly
60
  return dataset
61
 
62
  def log_inference_data(
 
91
  new_entry = {
92
  "timestamp": datetime.datetime.now().isoformat(),
93
  "image": original_image_b64,
94
+ "inference_request": json.dumps(inference_params),
95
+ "model_predictions": json.dumps(model_predictions),
96
+ "ensemble_output": json.dumps(ensemble_output),
97
+ "forensic_outputs": forensic_images_b64, # This is already a list of strings
98
+ "agent_monitoring_data": json.dumps(agent_monitoring_data),
99
+ "human_feedback": json.dumps(human_feedback if human_feedback is not None else {})
100
  }
101
 
102
+ # Get current dataset features
103
+ features = dataset.features
104
+
105
+ # Convert existing dataset to a list of dictionaries
106
+ dataset_list = dataset.to_list()
107
+
108
+ # Append the new entry to the list
109
+ dataset_list.append(new_entry)
110
 
111
+ # Create a new dataset from the updated list
112
+ updated_dataset = Dataset.from_list(dataset_list, features=features)
 
 
113
 
114
  # This will push to the Hugging Face Hub if you are logged in and dataset is configured
115
  # Or save locally if not.