LPX
commited on
Commit
·
5967d4d
1
Parent(s):
87fc2bb
feat: implement CommitScheduler for automated logging to Hugging Face dataset and refactor dataset initialization process
Browse files- app_mcp.py +14 -2
- utils/hf_logger.py +34 -68
app_mcp.py
CHANGED
@@ -28,13 +28,15 @@ from forensics.registry import register_model, MODEL_REGISTRY, ModelEntry
|
|
28 |
from agents.weight_management import ModelWeightManager
|
29 |
from dotenv import load_dotenv
|
30 |
import json
|
|
|
31 |
|
32 |
# Configure logging
|
33 |
logging.basicConfig(level=logging.INFO)
|
34 |
logger = logging.getLogger(__name__)
|
35 |
os.environ['HF_HUB_CACHE'] = './models'
|
36 |
|
37 |
-
|
|
|
38 |
load_dotenv()
|
39 |
# print(os.getenv("HF_HUB_CACHE"))
|
40 |
|
@@ -586,4 +588,14 @@ with gr.Blocks(css="#post-gallery { overflow: hidden !important;} .grid-wrap{ ov
|
|
586 |
|
587 |
# --- MCP-Ready Launch ---
|
588 |
if __name__ == "__main__":
|
589 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
from agents.weight_management import ModelWeightManager
|
29 |
from dotenv import load_dotenv
|
30 |
import json
|
31 |
+
from huggingface_hub import CommitScheduler
|
32 |
|
33 |
# Configure logging
|
34 |
logging.basicConfig(level=logging.INFO)
|
35 |
logger = logging.getLogger(__name__)
|
36 |
os.environ['HF_HUB_CACHE'] = './models'
|
37 |
|
38 |
+
LOCAL_LOG_DIR = "./hf_inference_logs"
|
39 |
+
HF_DATASET_NAME="aiwithoutborders-xyz/degentic_rd0"
|
40 |
load_dotenv()
|
41 |
# print(os.getenv("HF_HUB_CACHE"))
|
42 |
|
|
|
588 |
|
589 |
# --- MCP-Ready Launch ---
|
590 |
if __name__ == "__main__":
|
591 |
+
# Initialize CommitScheduler
|
592 |
+
# The scheduler will monitor LOCAL_LOG_DIR and push changes to HF_DATASET_NAME
|
593 |
+
with CommitScheduler(
|
594 |
+
repo_id=HF_DATASET_NAME, # Your Hugging Face dataset repository ID
|
595 |
+
repo_type="dataset",
|
596 |
+
folder_path=LOCAL_LOG_DIR,
|
597 |
+
every=5, # Commit every 5 minutes
|
598 |
+
private=True, # Keep your dataset private
|
599 |
+
# token=os.getenv("HF_TOKEN") # Uncomment and set if token is not saved globally
|
600 |
+
) as scheduler:
|
601 |
+
demo.launch(share=True, mcp_server=True)
|
utils/hf_logger.py
CHANGED
@@ -5,15 +5,15 @@ import io
|
|
5 |
import datetime
|
6 |
from PIL import Image
|
7 |
import logging
|
8 |
-
from
|
9 |
-
import copy
|
10 |
import numpy as np
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
14 |
-
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
|
|
|
15 |
|
16 |
-
# Custom JSON Encoder to handle numpy types
|
17 |
class NumpyEncoder(json.JSONEncoder):
|
18 |
def default(self, obj):
|
19 |
if isinstance(obj, np.float32):
|
@@ -33,39 +33,18 @@ def _pil_to_base64(image: Image.Image) -> str:
|
|
33 |
image.save(buffered, format="JPEG", quality=85)
|
34 |
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
35 |
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
38 |
try:
|
39 |
-
|
40 |
-
dataset
|
41 |
-
logger.info(f"Loaded existing Hugging Face dataset: {HF_DATASET_NAME}")
|
42 |
except Exception:
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
# Define the features explicitly
|
47 |
-
features = Features({
|
48 |
-
"timestamp": Value('string'),
|
49 |
-
"image": Value('string'), # base64 string
|
50 |
-
"inference_request": Value('string'), # JSON string
|
51 |
-
"model_predictions": Value('string'), # JSON string
|
52 |
-
"ensemble_output": Value('string'), # JSON string
|
53 |
-
"forensic_outputs": Sequence(Value('string')), # List of base64 image strings
|
54 |
-
"agent_monitoring_data": Value('string'), # JSON string
|
55 |
-
"human_feedback": Value('string') # JSON string
|
56 |
-
})
|
57 |
-
|
58 |
-
dataset = Dataset.from_dict({
|
59 |
-
"timestamp": [],
|
60 |
-
"image": [],
|
61 |
-
"inference_request": [],
|
62 |
-
"model_predictions": [],
|
63 |
-
"ensemble_output": [],
|
64 |
-
"forensic_outputs": [],
|
65 |
-
"agent_monitoring_data": [],
|
66 |
-
"human_feedback": []
|
67 |
-
}, features=features) # Pass the features explicitly
|
68 |
-
return dataset
|
69 |
|
70 |
def log_inference_data(
|
71 |
original_image: Image.Image,
|
@@ -76,13 +55,12 @@ def log_inference_data(
|
|
76 |
agent_monitoring_data: dict,
|
77 |
human_feedback: dict = None
|
78 |
):
|
79 |
-
"""Logs a single inference event to the Hugging Face dataset."""
|
80 |
try:
|
81 |
-
|
82 |
-
|
83 |
-
# Convert PIL Images to base64 strings for storage
|
84 |
original_image_b64 = _pil_to_base64(original_image)
|
85 |
-
|
86 |
forensic_images_b64 = []
|
87 |
for img_item in forensic_images:
|
88 |
if img_item is not None:
|
@@ -91,42 +69,30 @@ def log_inference_data(
|
|
91 |
img_item = Image.fromarray(img_item)
|
92 |
except Exception as e:
|
93 |
logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
|
94 |
-
continue
|
95 |
-
|
96 |
-
# Now img_item should be a PIL Image, safe to pass to _pil_to_base64
|
97 |
forensic_images_b64.append(_pil_to_base64(img_item))
|
98 |
|
99 |
new_entry = {
|
100 |
"timestamp": datetime.datetime.now().isoformat(),
|
101 |
"image": original_image_b64,
|
102 |
-
"inference_request":
|
103 |
-
"model_predictions":
|
104 |
-
"ensemble_output":
|
105 |
-
"forensic_outputs": forensic_images_b64,
|
106 |
-
"agent_monitoring_data":
|
107 |
-
"human_feedback":
|
108 |
}
|
109 |
|
110 |
-
#
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
dataset_list = dataset.to_list()
|
115 |
-
|
116 |
-
# Append the new entry to the list
|
117 |
-
dataset_list.append(new_entry)
|
118 |
-
|
119 |
-
# Create a new dataset from the updated list
|
120 |
-
updated_dataset = Dataset.from_list(dataset_list, features=features)
|
121 |
|
122 |
-
#
|
123 |
-
|
124 |
-
|
125 |
-
logger.info("Inference data logged successfully to local cache.")
|
126 |
|
127 |
-
|
128 |
-
updated_dataset.push_to_hub(HF_DATASET_NAME, private=True)
|
129 |
-
logger.info("Inference data pushed to Hugging Face Hub.")
|
130 |
|
131 |
except Exception as e:
|
132 |
-
logger.error(f"Failed to log inference data to
|
|
|
5 |
import datetime
|
6 |
from PIL import Image
|
7 |
import logging
|
8 |
+
from huggingface_hub import HfApi, CommitOperationAdd # Keep HfApi for repo creation, but remove CommitOperationAdd for direct upload
|
|
|
9 |
import numpy as np
|
10 |
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
+
HF_DATASET_NAME = "aiwithoutborders-xyz/degentic_rd0"
|
14 |
+
LOCAL_LOG_DIR = "./hf_inference_logs" # Define a local directory to store logs
|
15 |
|
16 |
+
# Custom JSON Encoder to handle numpy types
|
17 |
class NumpyEncoder(json.JSONEncoder):
|
18 |
def default(self, obj):
|
19 |
if isinstance(obj, np.float32):
|
|
|
33 |
image.save(buffered, format="JPEG", quality=85)
|
34 |
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
35 |
|
36 |
+
# The initialize_dataset function will change significantly or be removed/simplified
|
37 |
+
# as we are no longer appending to a datasets.Dataset object directly in memory
|
38 |
+
def initialize_dataset_repo():
|
39 |
+
"""Initializes or ensures the Hugging Face dataset repository exists."""
|
40 |
+
api = HfApi()
|
41 |
try:
|
42 |
+
api.repo_info(repo_id=HF_DATASET_NAME, repo_type="dataset")
|
43 |
+
logger.info(f"Hugging Face dataset repository already exists: {HF_DATASET_NAME}")
|
|
|
44 |
except Exception:
|
45 |
+
logger.info(f"Creating new Hugging Face dataset repository: {HF_DATASET_NAME}")
|
46 |
+
api.create_repo(repo_id=HF_DATASET_NAME, repo_type="dataset", private=True)
|
47 |
+
return api # Return the API object for subsequent operations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
def log_inference_data(
|
50 |
original_image: Image.Image,
|
|
|
55 |
agent_monitoring_data: dict,
|
56 |
human_feedback: dict = None
|
57 |
):
|
58 |
+
"""Logs a single inference event by uploading a JSON file to the Hugging Face dataset repository."""
|
59 |
try:
|
60 |
+
api = initialize_dataset_repo() # Get or create the repository
|
61 |
+
|
|
|
62 |
original_image_b64 = _pil_to_base64(original_image)
|
63 |
+
|
64 |
forensic_images_b64 = []
|
65 |
for img_item in forensic_images:
|
66 |
if img_item is not None:
|
|
|
69 |
img_item = Image.fromarray(img_item)
|
70 |
except Exception as e:
|
71 |
logger.error(f"Error converting forensic image to PIL for base64 encoding: {e}")
|
72 |
+
continue
|
|
|
|
|
73 |
forensic_images_b64.append(_pil_to_base64(img_item))
|
74 |
|
75 |
new_entry = {
|
76 |
"timestamp": datetime.datetime.now().isoformat(),
|
77 |
"image": original_image_b64,
|
78 |
+
"inference_request": inference_params,
|
79 |
+
"model_predictions": model_predictions,
|
80 |
+
"ensemble_output": ensemble_output,
|
81 |
+
"forensic_outputs": forensic_images_b64,
|
82 |
+
"agent_monitoring_data": agent_monitoring_data,
|
83 |
+
"human_feedback": human_feedback if human_feedback is not None else {}
|
84 |
}
|
85 |
|
86 |
+
# Define a unique path for the new log file within the local directory
|
87 |
+
os.makedirs(LOCAL_LOG_DIR, exist_ok=True) # Ensure the local directory exists
|
88 |
+
timestamp_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
|
89 |
+
log_file_path = os.path.join(LOCAL_LOG_DIR, f"log_{timestamp_str}.json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
+
# Serialize the new entry to a JSON file using the custom encoder
|
92 |
+
with open(log_file_path, 'w', encoding='utf-8') as f:
|
93 |
+
json.dump(new_entry, f, cls=NumpyEncoder, indent=2)
|
|
|
94 |
|
95 |
+
logger.info(f"Inference data logged successfully to local file: {log_file_path}")
|
|
|
|
|
96 |
|
97 |
except Exception as e:
|
98 |
+
logger.error(f"Failed to log inference data to local file: {e}")
|