File size: 3,601 Bytes
58f23d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import os
import base64
import json
import io
import datetime
from PIL import Image
import logging
from datasets import Dataset, load_dataset
logger = logging.getLogger(__name__)
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name
def _pil_to_base64(image: Image.Image) -> str:
"""Converts a PIL Image to a base64 string."""
buffered = io.BytesIO()
# Ensure image is in RGB mode before saving as JPEG
if image.mode != 'RGB':
image = image.convert('RGB')
image.save(buffered, format="JPEG", quality=85)
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def initialize_dataset():
"""Initializes or loads the Hugging Face dataset."""
try:
# Try to load existing dataset
dataset = load_dataset(HF_DATASET_NAME, split="train")
logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}")
except Exception:
# If dataset does not exist, create a new one with an empty structure
logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
dataset = Dataset.from_dict({
"timestamp": [],
"image": [], # Storing base64 string for simplicity, or path/bytes if preferred
"inference_request": [],
"model_predictions": [],
"ensemble_output": [],
"forensic_outputs": [], # List of base64 image strings
"agent_monitoring_data": [],
"human_feedback": []
})
return dataset
def log_inference_data(
original_image: Image.Image,
inference_params: dict,
model_predictions: list[dict],
ensemble_output: dict,
forensic_images: list[Image.Image],
agent_monitoring_data: dict,
human_feedback: dict = None
):
"""Logs a single inference event to the Hugging Face dataset."""
try:
dataset = initialize_dataset()
# Convert PIL Images to base64 strings for storage
original_image_b64 = _pil_to_base64(original_image)
forensic_images_b64 = [_pil_to_base64(img) for img in forensic_images if img is not None]
new_entry = {
"timestamp": datetime.datetime.now().isoformat(),
"image": original_image_b64,
"inference_request": inference_params,
"model_predictions": model_predictions,
"ensemble_output": ensemble_output,
"forensic_outputs": forensic_images_b64,
"agent_monitoring_data": agent_monitoring_data,
"human_feedback": human_feedback if human_feedback is not None else {}
}
# Append the new entry
# Note: Directly appending might not be efficient for large datasets or frequent logging
# For a production system, consider batched writes or more robust data pipelines.
updated_dataset = dataset.add_item(new_entry)
# This will push to the Hugging Face Hub if you are logged in and dataset is configured
# Or save locally if not.
updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now
logger.info("Inference data logged successfully to local cache.")
# To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in
# updated_dataset.push_to_hub(HF_DATASET_NAME, private=True)
# logger.info("Inference data pushed to Hugging Face Hub.")
except Exception as e:
logger.error(f"Failed to log inference data to Hugging Face dataset: {e}") |