LPX
feat: enhance image handling by ensuring input is a PIL Image and updating forensic image logging
d20c076
raw
history blame
4.37 kB
import os
import base64
import json
import io
import datetime
from PIL import Image
import logging
from datasets import Dataset, load_dataset
logger = logging.getLogger(__name__)
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name
def _pil_to_base64(image: Image.Image) -> str:
"""Converts a PIL Image to a base64 string."""
# Explicitly check if the input is a PIL Image
if not isinstance(image, Image.Image):
raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")
buffered = io.BytesIO()
# Ensure image is in RGB mode before saving as JPEG
if image.mode != 'RGB':
image = image.convert('RGB')
image.save(buffered, format="JPEG", quality=85)
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def initialize_dataset():
"""Initializes or loads the Hugging Face dataset."""
try:
# Try to load existing dataset
dataset = load_dataset(HF_DATASET_NAME, split="train")
logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}")
except Exception:
# If dataset does not exist, create a new one with an empty structure
logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
dataset = Dataset.from_dict({
"timestamp": [],
"image": [], # Storing base64 string for simplicity, or path/bytes if preferred
"inference_request": [],
"model_predictions": [],
"ensemble_output": [],
"forensic_outputs": [], # List of base64 image strings
"agent_monitoring_data": [],
"human_feedback": []
})
return dataset
def log_inference_data(
original_image: Image.Image,
inference_params: dict,
model_predictions: list[dict],
ensemble_output: dict,
forensic_images: list[Image.Image],
agent_monitoring_data: dict,
human_feedback: dict = None
):
"""Logs a single inference event to the Hugging Face dataset."""
try:
dataset = initialize_dataset()
# Convert PIL Images to base64 strings for storage
original_image_b64 = _pil_to_base64(original_image)
forensic_images_b64 = []
for img_item in forensic_images:
if img_item is not None:
if not isinstance(img_item, Image.Image):
try:
img_item = Image.fromarray(img_item)
except Exception as e:
logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
continue # Skip this image if conversion fails
# Now img_item should be a PIL Image, safe to pass to _pil_to_base64
forensic_images_b64.append(_pil_to_base64(img_item))
new_entry = {
"timestamp": datetime.datetime.now().isoformat(),
"image": original_image_b64,
"inference_request": inference_params,
"model_predictions": model_predictions,
"ensemble_output": ensemble_output,
"forensic_outputs": forensic_images_b64, # List of base64 image strings
"agent_monitoring_data": agent_monitoring_data,
"human_feedback": human_feedback if human_feedback is not None else {}
}
# Append the new entry
# Note: Directly appending might not be efficient for large datasets or frequent logging
# For a production system, consider batched writes or more robust data pipelines.
updated_dataset = dataset.add_item(new_entry)
# This will push to the Hugging Face Hub if you are logged in and dataset is configured
# Or save locally if not.
updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now
logger.info("Inference data logged successfully to local cache.")
# To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in
# updated_dataset.push_to_hub(HF_DATASET_NAME, private=True)
# logger.info("Inference data pushed to Hugging Face Hub.")
except Exception as e:
logger.error(f"Failed to log inference data to Hugging Face dataset: {e}")