LPX
feat: enhance image handling by ensuring input is a PIL Image and updating forensic image logging
d20c076
import os | |
import base64 | |
import json | |
import io | |
import datetime | |
from PIL import Image | |
import logging | |
from datasets import Dataset, load_dataset | |
logger = logging.getLogger(__name__) | |
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name | |
def _pil_to_base64(image: Image.Image) -> str: | |
"""Converts a PIL Image to a base64 string.""" | |
# Explicitly check if the input is a PIL Image | |
if not isinstance(image, Image.Image): | |
raise TypeError(f"Expected a PIL Image, but received type: {type(image)}") | |
buffered = io.BytesIO() | |
# Ensure image is in RGB mode before saving as JPEG | |
if image.mode != 'RGB': | |
image = image.convert('RGB') | |
image.save(buffered, format="JPEG", quality=85) | |
return base64.b64encode(buffered.getvalue()).decode('utf-8') | |
def initialize_dataset(): | |
"""Initializes or loads the Hugging Face dataset.""" | |
try: | |
# Try to load existing dataset | |
dataset = load_dataset(HF_DATASET_NAME, split="train") | |
logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}") | |
except Exception: | |
# If dataset does not exist, create a new one with an empty structure | |
logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}") | |
dataset = Dataset.from_dict({ | |
"timestamp": [], | |
"image": [], # Storing base64 string for simplicity, or path/bytes if preferred | |
"inference_request": [], | |
"model_predictions": [], | |
"ensemble_output": [], | |
"forensic_outputs": [], # List of base64 image strings | |
"agent_monitoring_data": [], | |
"human_feedback": [] | |
}) | |
return dataset | |
def log_inference_data( | |
original_image: Image.Image, | |
inference_params: dict, | |
model_predictions: list[dict], | |
ensemble_output: dict, | |
forensic_images: list[Image.Image], | |
agent_monitoring_data: dict, | |
human_feedback: dict = None | |
): | |
"""Logs a single inference event to the Hugging Face dataset.""" | |
try: | |
dataset = initialize_dataset() | |
# Convert PIL Images to base64 strings for storage | |
original_image_b64 = _pil_to_base64(original_image) | |
forensic_images_b64 = [] | |
for img_item in forensic_images: | |
if img_item is not None: | |
if not isinstance(img_item, Image.Image): | |
try: | |
img_item = Image.fromarray(img_item) | |
except Exception as e: | |
logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}") | |
continue # Skip this image if conversion fails | |
# Now img_item should be a PIL Image, safe to pass to _pil_to_base64 | |
forensic_images_b64.append(_pil_to_base64(img_item)) | |
new_entry = { | |
"timestamp": datetime.datetime.now().isoformat(), | |
"image": original_image_b64, | |
"inference_request": inference_params, | |
"model_predictions": model_predictions, | |
"ensemble_output": ensemble_output, | |
"forensic_outputs": forensic_images_b64, # List of base64 image strings | |
"agent_monitoring_data": agent_monitoring_data, | |
"human_feedback": human_feedback if human_feedback is not None else {} | |
} | |
# Append the new entry | |
# Note: Directly appending might not be efficient for large datasets or frequent logging | |
# For a production system, consider batched writes or more robust data pipelines. | |
updated_dataset = dataset.add_item(new_entry) | |
# This will push to the Hugging Face Hub if you are logged in and dataset is configured | |
# Or save locally if not. | |
updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now | |
logger.info("Inference data logged successfully to local cache.") | |
# To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in | |
# updated_dataset.push_to_hub(HF_DATASET_NAME, private=True) | |
# logger.info("Inference data pushed to Hugging Face Hub.") | |
except Exception as e: | |
logger.error(f"Failed to log inference data to Hugging Face dataset: {e}") |