OpenSight-Deepfake-Detection-Models-Playground

Running

OpenSight-Deepfake-Detection-Models-Playground / utils /hf_logger.py

LPX

feat: enhance image handling by ensuring input is a PIL Image and updating forensic image logging

d20c076 6 days ago

4.37 kB

	import os
	import base64
	import json
	import io
	import datetime
	from PIL import Image
	import logging
	from datasets import Dataset, load_dataset

	logger = logging.getLogger(__name__)

	HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0" # TODO: Replace with your actual HF username and dataset name

	def _pil_to_base64(image: Image.Image) -> str:
	"""Converts a PIL Image to a base64 string."""
	# Explicitly check if the input is a PIL Image
	if not isinstance(image, Image.Image):
	raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")

	buffered = io.BytesIO()
	# Ensure image is in RGB mode before saving as JPEG
	if image.mode != 'RGB':
	image = image.convert('RGB')
	image.save(buffered, format="JPEG", quality=85)
	return base64.b64encode(buffered.getvalue()).decode('utf-8')

	def initialize_dataset():
	"""Initializes or loads the Hugging Face dataset."""
	try:
	# Try to load existing dataset
	dataset = load_dataset(HF_DATASET_NAME, split="train")
	logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}")
	except Exception:
	# If dataset does not exist, create a new one with an empty structure
	logger.info(f"Creating new Hugging Face dataset: {HF_DATASET_NAME}")
	dataset = Dataset.from_dict({
	"timestamp": [],
	"image": [], # Storing base64 string for simplicity, or path/bytes if preferred
	"inference_request": [],
	"model_predictions": [],
	"ensemble_output": [],
	"forensic_outputs": [], # List of base64 image strings
	"agent_monitoring_data": [],
	"human_feedback": []
	})
	return dataset

	def log_inference_data(
	original_image: Image.Image,
	inference_params: dict,
	model_predictions: list[dict],
	ensemble_output: dict,
	forensic_images: list[Image.Image],
	agent_monitoring_data: dict,
	human_feedback: dict = None
	):
	"""Logs a single inference event to the Hugging Face dataset."""
	try:
	dataset = initialize_dataset()

	# Convert PIL Images to base64 strings for storage
	original_image_b64 = _pil_to_base64(original_image)

	forensic_images_b64 = []
	for img_item in forensic_images:
	if img_item is not None:
	if not isinstance(img_item, Image.Image):
	try:
	img_item = Image.fromarray(img_item)
	except Exception as e:
	logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
	continue # Skip this image if conversion fails

	# Now img_item should be a PIL Image, safe to pass to _pil_to_base64
	forensic_images_b64.append(_pil_to_base64(img_item))

	new_entry = {
	"timestamp": datetime.datetime.now().isoformat(),
	"image": original_image_b64,
	"inference_request": inference_params,
	"model_predictions": model_predictions,
	"ensemble_output": ensemble_output,
	"forensic_outputs": forensic_images_b64, # List of base64 image strings
	"agent_monitoring_data": agent_monitoring_data,
	"human_feedback": human_feedback if human_feedback is not None else {}
	}

	# Append the new entry
	# Note: Directly appending might not be efficient for large datasets or frequent logging
	# For a production system, consider batched writes or more robust data pipelines.
	updated_dataset = dataset.add_item(new_entry)

	# This will push to the Hugging Face Hub if you are logged in and dataset is configured
	# Or save locally if not.
	updated_dataset.save_to_disk("sherloq-forensics/hf_dataset_cache") # Save locally for now
	logger.info("Inference data logged successfully to local cache.")

	# To push to hub, uncomment the line below and ensure HF_DATASET_NAME is set correctly and you are logged in
	# updated_dataset.push_to_hub(HF_DATASET_NAME, private=True)
	# logger.info("Inference data pushed to Hugging Face Hub.")

	except Exception as e:
	logger.error(f"Failed to log inference data to Hugging Face dataset: {e}")