File size: 4,633 Bytes
58f23d5
 
 
 
 
 
 
db6eb6a
0f427f3
58f23d5
 
 
5967d4d
db6eb6a
58f23d5
5967d4d
0f427f3
 
 
 
 
 
db6eb6a
 
d20c076
 
 
db6eb6a
 
 
 
 
58f23d5
 
db6eb6a
 
 
58f23d5
5967d4d
 
 
 
f69af75
58f23d5
5967d4d
 
58f23d5
5967d4d
 
 
58f23d5
 
 
 
 
 
 
 
 
 
5967d4d
58f23d5
5967d4d
 
db6eb6a
5967d4d
db6eb6a
d20c076
 
 
 
 
 
db6eb6a
5967d4d
db6eb6a
58f23d5
 
 
db6eb6a
5967d4d
 
 
db6eb6a
5967d4d
 
58f23d5
 
5967d4d
 
 
 
58f23d5
5967d4d
 
 
58f23d5
bc355a9
 
 
 
 
 
 
 
 
 
db6eb6a
 
58f23d5
 
5967d4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import base64
import json
import io
import datetime
from PIL import Image
import logging
from huggingface_hub import HfApi, CommitScheduler
import numpy as np

logger = logging.getLogger(__name__)

HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
LOCAL_LOG_DIR = "./hf_inference_logs"

# Custom JSON Encoder to handle numpy types
class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.float32):
            return float(obj)
        return json.JSONEncoder.default(self, obj)

def _save_pil_image_to_file(image: Image.Image, directory: str, prefix: str) -> str:
    """Saves a PIL Image to a file and returns its filename."""
    if not isinstance(image, Image.Image):
        raise TypeError(f"Expected a PIL Image, but received type: {type(image)}")

    os.makedirs(directory, exist_ok=True)
    timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
    filename = f"{prefix}_{timestamp_str}.png"
    file_path = os.path.join(directory, filename)

    if image.mode != 'RGB':
        image = image.convert('RGB')
    image.save(file_path, format="PNG")
    logger.info(f"Saved image to: {file_path}")
    return filename

# The initialize_dataset function will change significantly or be removed/simplified
# as we are no longer appending to a datasets.Dataset object directly in memory
def initialize_dataset_repo():
    """Initializes or ensures the Hugging Face dataset repository exists."""
    api = HfApi(token=os.getenv("HF_TOKEN"))
    try:
        api.repo_info(repo_id=HF_DATASET_NAME, repo_type="dataset")
        logger.info(f"Hugging Face dataset repository already exists: {HF_DATASET_NAME}")
    except Exception:
        logger.info(f"Creating new Hugging Face dataset repository: {HF_DATASET_NAME}")
        api.create_repo(repo_id=HF_DATASET_NAME, repo_type="dataset", private=True)
    return api # Return the API object for subsequent operations

def log_inference_data(
    original_image: Image.Image,
    inference_params: dict,
    model_predictions: list[dict],
    ensemble_output: dict,
    forensic_images: list[Image.Image],
    agent_monitoring_data: dict,
    human_feedback: dict = None
):
    """Logs a single inference event by uploading a JSON file to the Hugging Face dataset repository."""
    try:
        api = initialize_dataset_repo() # Get or create the repository

        original_image_filename = _save_pil_image_to_file(original_image, LOCAL_LOG_DIR, "original")

        forensic_images_filenames = []
        for img_item in forensic_images:
            if img_item is not None:
                if not isinstance(img_item, Image.Image):
                    try:
                        img_item = Image.fromarray(img_item)
                    except Exception as e:
                        logger.error(f"Error converting forensic image to PIL for saving: {e}")
                        continue
                forensic_images_filenames.append(_save_pil_image_to_file(img_item, LOCAL_LOG_DIR, "forensic"))

        new_entry = {
            "timestamp": datetime.datetime.now().isoformat(),
            "image": original_image_filename,
            "inference_request": inference_params,
            "model_predictions": model_predictions,
            "ensemble_output": ensemble_output,
            "forensic_outputs": forensic_images_filenames,
            "agent_monitoring_data": agent_monitoring_data,
            "human_feedback": human_feedback if human_feedback is not None else {}
        }
        
        # Define a unique path for the new log file within the local directory
        os.makedirs(LOCAL_LOG_DIR, exist_ok=True) # Ensure the local directory exists
        timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
        log_file_path = os.path.join(LOCAL_LOG_DIR, f"log_{timestamp_str}.json")
        
        # Serialize the new entry to a JSON file using the custom encoder
        with open(log_file_path, 'w', encoding='utf-8') as f:
            json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
        
        # Schedule commit to Hugging Face dataset repository
        scheduler = CommitScheduler(
            repo_id=HF_DATASET_NAME,
            repo_type="dataset",
            folder_path=LOCAL_LOG_DIR,
            path_in_repo="logs",
            token=os.getenv("HF_TOKEN"),
            every=10  # Commit every 10 files
        )
        
        with scheduler:
            logger.info(f"Inference data logged successfully to local file: {log_file_path}")

    except Exception as e:
        logger.error(f"Failed to log inference data to local file: {e}")