|
import os |
|
import base64 |
|
import json |
|
import io |
|
import datetime |
|
from PIL import Image |
|
import logging |
|
from datasets import Dataset, load_dataset |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" |
|
|
|
def _pil_to_base64(image: Image.Image) -> str: |
|
"""Converts a PIL Image to a base64 string.""" |
|
|
|
if not isinstance(image, Image.Image): |
|
raise TypeError(f"Expected a PIL Image, but received type: {type(image)}") |
|
|
|
buffered = io.BytesIO() |
|
|
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
image.save(buffered, format="JPEG", quality=85) |
|
return base64.b64encode(buffered.getvalue()).decode('utf-8') |
|
|
|
def initialize_dataset(): |
|
"""Initializes or loads the Hugging Face dataset.""" |
|
try: |
|
|
|
dataset = load_dataset(HF_DATASET_NAME, split="train") |
|
logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}") |
|
except Exception: |
|
|
|
logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}") |
|
dataset = Dataset.from_dict({ |
|
"timestamp": [], |
|
"image": [], |
|
"inference_request": [], |
|
"model_predictions": [], |
|
"ensemble_output": [], |
|
"forensic_outputs": [], |
|
"agent_monitoring_data": [], |
|
"human_feedback": [] |
|
}) |
|
return dataset |
|
|
|
def log_inference_data( |
|
original_image: Image.Image, |
|
inference_params: dict, |
|
model_predictions: list[dict], |
|
ensemble_output: dict, |
|
forensic_images: list[Image.Image], |
|
agent_monitoring_data: dict, |
|
human_feedback: dict = None |
|
): |
|
"""Logs a single inference event to the Hugging Face dataset.""" |
|
try: |
|
dataset = initialize_dataset() |
|
|
|
|
|
original_image_b64 = _pil_to_base64(original_image) |
|
|
|
forensic_images_b64 = [] |
|
for img_item in forensic_images: |
|
if img_item is not None: |
|
if not isinstance(img_item, Image.Image): |
|
try: |
|
img_item = Image.fromarray(img_item) |
|
except Exception as e: |
|
logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}") |
|
continue |
|
|
|
|
|
forensic_images_b64.append(_pil_to_base64(img_item)) |
|
|
|
new_entry = { |
|
"timestamp": datetime.datetime.now().isoformat(), |
|
"image": original_image_b64, |
|
"inference_request": inference_params, |
|
"model_predictions": model_predictions, |
|
"ensemble_output": ensemble_output, |
|
"forensic_outputs": forensic_images_b64, |
|
"agent_monitoring_data": agent_monitoring_data, |
|
"human_feedback": human_feedback if human_feedback is not None else {} |
|
} |
|
|
|
logger.info(f"Type of original_image_b64: {type(original_image_b64)}") |
|
logger.info(f"Type of inference_params: {type(inference_params)}") |
|
logger.info(f"Type of model_predictions: {type(model_predictions)}") |
|
logger.info(f"Type of ensemble_output: {type(ensemble_output)}") |
|
logger.info(f"Type of forensic_images_b64: {type(forensic_images_b64)}") |
|
logger.info(f"Type of agent_monitoring_data: {type(agent_monitoring_data)}") |
|
logger.info(f"Type of human_feedback: {type(human_feedback)}") |
|
|
|
|
|
|
|
|
|
updated_dataset = dataset.add_item(new_entry) |
|
|
|
|
|
|
|
updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") |
|
logger.info("Inference data logged successfully to local cache.") |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
logger.error(f"Failed to log inference data to Hugging Face dataset: {e}") |