Spaces:
Running
Running
Update src/aibom_generator/generator.py
Browse files- src/aibom_generator/generator.py +77 -238
src/aibom_generator/generator.py
CHANGED
@@ -1,23 +1,12 @@
|
|
1 |
-
"""
|
2 |
-
Core functionality for generating CycloneDX AIBOMs from Hugging Face models.
|
3 |
-
"""
|
4 |
-
|
5 |
import json
|
6 |
import uuid
|
7 |
import datetime
|
8 |
-
from typing import Dict,
|
9 |
|
10 |
-
from huggingface_hub import HfApi, ModelCard
|
11 |
|
12 |
|
13 |
class AIBOMGenerator:
|
14 |
-
"""
|
15 |
-
Generator for AI Bills of Materials (AIBOMs) in CycloneDX format.
|
16 |
-
|
17 |
-
This class provides functionality to generate CycloneDX 1.6 compliant
|
18 |
-
AIBOMs for machine learning models hosted on the Hugging Face Hub.
|
19 |
-
"""
|
20 |
-
|
21 |
def __init__(
|
22 |
self,
|
23 |
hf_token: Optional[str] = None,
|
@@ -25,93 +14,42 @@ class AIBOMGenerator:
|
|
25 |
use_inference: bool = True,
|
26 |
cache_dir: Optional[str] = None,
|
27 |
):
|
28 |
-
"""
|
29 |
-
Initialize the AIBOM Generator.
|
30 |
-
|
31 |
-
Args:
|
32 |
-
hf_token: Hugging Face API token for accessing private models
|
33 |
-
inference_model_url: URL of the inference model service for extracting
|
34 |
-
metadata from unstructured text
|
35 |
-
use_inference: Whether to use the inference model for metadata extraction
|
36 |
-
cache_dir: Directory to cache API responses and model cards
|
37 |
-
"""
|
38 |
self.hf_api = HfApi(token=hf_token)
|
39 |
self.inference_model_url = inference_model_url
|
40 |
self.use_inference = use_inference
|
41 |
self.cache_dir = cache_dir
|
42 |
-
|
43 |
def generate_aibom(
|
44 |
self,
|
45 |
model_id: str,
|
46 |
output_file: Optional[str] = None,
|
47 |
include_inference: Optional[bool] = None,
|
48 |
) -> Dict[str, Any]:
|
49 |
-
"""
|
50 |
-
Generate a CycloneDX AIBOM for the specified Hugging Face model.
|
51 |
-
|
52 |
-
Args:
|
53 |
-
model_id: The Hugging Face model ID (e.g., "google/bert-base-uncased")
|
54 |
-
output_file: Optional path to save the generated AIBOM
|
55 |
-
include_inference: Override the default inference model usage setting
|
56 |
-
|
57 |
-
Returns:
|
58 |
-
The generated AIBOM as a dictionary
|
59 |
-
"""
|
60 |
-
# Determine whether to use inference
|
61 |
use_inference = include_inference if include_inference is not None else self.use_inference
|
62 |
-
|
63 |
-
# Fetch model information
|
64 |
model_info = self._fetch_model_info(model_id)
|
65 |
model_card = self._fetch_model_card(model_id)
|
66 |
-
|
67 |
-
# Generate the AIBOM
|
68 |
aibom = self._create_aibom_structure(model_id, model_info, model_card, use_inference)
|
69 |
-
|
70 |
-
# Save to file if requested
|
71 |
if output_file:
|
72 |
with open(output_file, 'w') as f:
|
73 |
json.dump(aibom, f, indent=2)
|
74 |
-
|
75 |
return aibom
|
76 |
-
|
77 |
def _fetch_model_info(self, model_id: str) -> Dict[str, Any]:
|
78 |
-
"""
|
79 |
-
Fetch model information from the Hugging Face API.
|
80 |
-
|
81 |
-
Args:
|
82 |
-
model_id: The Hugging Face model ID
|
83 |
-
|
84 |
-
Returns:
|
85 |
-
Model information as a dictionary
|
86 |
-
"""
|
87 |
-
# TODO: Implement caching
|
88 |
try:
|
89 |
-
|
90 |
-
return model_info
|
91 |
except Exception as e:
|
92 |
-
# Log the error and return empty dict
|
93 |
print(f"Error fetching model info for {model_id}: {e}")
|
94 |
return {}
|
95 |
-
|
96 |
def _fetch_model_card(self, model_id: str) -> Optional[ModelCard]:
|
97 |
-
"""
|
98 |
-
Fetch the model card for the specified model.
|
99 |
-
|
100 |
-
Args:
|
101 |
-
model_id: The Hugging Face model ID
|
102 |
-
|
103 |
-
Returns:
|
104 |
-
ModelCard object if available, None otherwise
|
105 |
-
"""
|
106 |
-
# TODO: Implement caching
|
107 |
try:
|
108 |
-
|
109 |
-
return model_card
|
110 |
except Exception as e:
|
111 |
-
# Log the error and return None
|
112 |
print(f"Error fetching model card for {model_id}: {e}")
|
113 |
return None
|
114 |
-
|
115 |
def _create_aibom_structure(
|
116 |
self,
|
117 |
model_id: str,
|
@@ -119,28 +57,12 @@ class AIBOMGenerator:
|
|
119 |
model_card: Optional[ModelCard],
|
120 |
use_inference: bool,
|
121 |
) -> Dict[str, Any]:
|
122 |
-
"""
|
123 |
-
Create the CycloneDX AIBOM structure.
|
124 |
-
|
125 |
-
Args:
|
126 |
-
model_id: The Hugging Face model ID
|
127 |
-
model_info: Model information from the API
|
128 |
-
model_card: ModelCard object if available
|
129 |
-
use_inference: Whether to use inference for metadata extraction
|
130 |
-
|
131 |
-
Returns:
|
132 |
-
CycloneDX AIBOM as a dictionary
|
133 |
-
"""
|
134 |
-
# Extract structured metadata
|
135 |
metadata = self._extract_structured_metadata(model_id, model_info, model_card)
|
136 |
-
|
137 |
-
# Extract unstructured metadata if requested and available
|
138 |
if use_inference and model_card and self.inference_model_url:
|
139 |
unstructured_metadata = self._extract_unstructured_metadata(model_card)
|
140 |
-
# Merge with structured metadata, giving priority to structured
|
141 |
metadata = {**unstructured_metadata, **metadata}
|
142 |
-
|
143 |
-
# Create the AIBOM structure
|
144 |
aibom = {
|
145 |
"bomFormat": "CycloneDX",
|
146 |
"specVersion": "1.6",
|
@@ -148,34 +70,24 @@ class AIBOMGenerator:
|
|
148 |
"version": 1,
|
149 |
"metadata": self._create_metadata_section(model_id, metadata),
|
150 |
"components": [self._create_component_section(model_id, metadata)],
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
}
|
152 |
-
|
153 |
-
# Add external references if available
|
154 |
-
if "external_references" in metadata:
|
155 |
-
aibom["externalReferences"] = metadata["external_references"]
|
156 |
-
|
157 |
return aibom
|
158 |
-
|
159 |
def _extract_structured_metadata(
|
160 |
self,
|
161 |
model_id: str,
|
162 |
model_info: Dict[str, Any],
|
163 |
model_card: Optional[ModelCard],
|
164 |
) -> Dict[str, Any]:
|
165 |
-
"""
|
166 |
-
Extract structured metadata from model info and model card.
|
167 |
-
|
168 |
-
Args:
|
169 |
-
model_id: The Hugging Face model ID
|
170 |
-
model_info: Model information from the API
|
171 |
-
model_card: ModelCard object if available
|
172 |
-
|
173 |
-
Returns:
|
174 |
-
Structured metadata as a dictionary
|
175 |
-
"""
|
176 |
metadata = {}
|
177 |
-
|
178 |
-
# Extract from model_info
|
179 |
if model_info:
|
180 |
metadata.update({
|
181 |
"name": model_info.modelId.split("/")[-1] if hasattr(model_info, "modelId") else model_id.split("/")[-1],
|
@@ -185,12 +97,9 @@ class AIBOMGenerator:
|
|
185 |
"downloads": model_info.downloads if hasattr(model_info, "downloads") else 0,
|
186 |
"last_modified": model_info.lastModified if hasattr(model_info, "lastModified") else None,
|
187 |
})
|
188 |
-
|
189 |
-
# Extract from model_card
|
190 |
if model_card and model_card.data:
|
191 |
card_data = model_card.data.to_dict() if hasattr(model_card.data, "to_dict") else {}
|
192 |
-
|
193 |
-
# Map card data to metadata
|
194 |
metadata.update({
|
195 |
"language": card_data.get("language"),
|
196 |
"license": card_data.get("license"),
|
@@ -200,189 +109,119 @@ class AIBOMGenerator:
|
|
200 |
"model_name": card_data.get("model_name"),
|
201 |
"tags": card_data.get("tags", metadata.get("tags", [])),
|
202 |
})
|
203 |
-
|
204 |
-
# Extract evaluation results if available
|
205 |
if hasattr(model_card.data, "eval_results") and model_card.data.eval_results:
|
206 |
metadata["eval_results"] = model_card.data.eval_results
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
return {k: v for k, v in metadata.items() if v is not None}
|
209 |
-
|
210 |
def _extract_unstructured_metadata(self, model_card: ModelCard) -> Dict[str, Any]:
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
Args:
|
215 |
-
model_card: ModelCard object
|
216 |
-
|
217 |
-
Returns:
|
218 |
-
Extracted metadata as a dictionary
|
219 |
-
"""
|
220 |
-
# TODO: Implement inference model integration
|
221 |
-
# This is a placeholder that will be replaced with actual inference model calls
|
222 |
-
return {}
|
223 |
-
|
224 |
def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
225 |
-
"""
|
226 |
-
Create the metadata section of the CycloneDX AIBOM.
|
227 |
-
|
228 |
-
Args:
|
229 |
-
model_id: The Hugging Face model ID
|
230 |
-
metadata: Extracted metadata
|
231 |
-
|
232 |
-
Returns:
|
233 |
-
Metadata section as a dictionary
|
234 |
-
"""
|
235 |
-
# Create timestamp
|
236 |
timestamp = datetime.datetime.utcnow().isoformat() + "Z"
|
237 |
-
|
238 |
-
# Create tools section
|
239 |
tools = [{
|
240 |
"vendor": "Aetheris AI",
|
241 |
"name": "aibom-generator",
|
242 |
-
"version":
|
243 |
}]
|
244 |
-
|
245 |
-
# Create authors section
|
246 |
authors = []
|
247 |
if "author" in metadata and metadata["author"]:
|
248 |
authors.append({
|
249 |
"name": metadata["author"],
|
250 |
"url": f"https://huggingface.co/{metadata['author']}"
|
251 |
})
|
252 |
-
|
253 |
-
# Create component section (reference to the main component)
|
254 |
component = {
|
255 |
"type": "machine-learning-model",
|
256 |
"name": metadata.get("name", model_id.split("/")[-1]),
|
257 |
-
"bom-ref": f"pkg:
|
258 |
}
|
259 |
-
|
260 |
-
# Create properties section
|
261 |
properties = []
|
262 |
for key, value in metadata.items():
|
263 |
if key not in ["name", "author", "license"] and value is not None:
|
264 |
if isinstance(value, (list, dict)):
|
265 |
value = json.dumps(value)
|
266 |
-
properties.append({
|
267 |
-
|
268 |
-
"value": str(value)
|
269 |
-
})
|
270 |
-
|
271 |
-
# Assemble metadata section
|
272 |
metadata_section = {
|
273 |
"timestamp": timestamp,
|
274 |
"tools": tools,
|
|
|
275 |
}
|
276 |
-
|
277 |
if authors:
|
278 |
metadata_section["authors"] = authors
|
279 |
-
|
280 |
-
if component:
|
281 |
-
metadata_section["component"] = component
|
282 |
-
|
283 |
if properties:
|
284 |
metadata_section["properties"] = properties
|
285 |
-
|
286 |
return metadata_section
|
287 |
-
|
288 |
def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
289 |
-
"""
|
290 |
-
Create the component section of the CycloneDX AIBOM.
|
291 |
-
|
292 |
-
Args:
|
293 |
-
model_id: The Hugging Face model ID
|
294 |
-
metadata: Extracted metadata
|
295 |
-
|
296 |
-
Returns:
|
297 |
-
Component section as a dictionary
|
298 |
-
"""
|
299 |
-
# Create basic component information
|
300 |
component = {
|
301 |
"type": "machine-learning-model",
|
302 |
-
"bom-ref": f"pkg:
|
303 |
"name": metadata.get("name", model_id.split("/")[-1]),
|
304 |
-
"purl": f"pkg:
|
305 |
}
|
306 |
-
|
307 |
-
# Add description if available
|
308 |
if "description" in metadata:
|
309 |
component["description"] = metadata["description"]
|
310 |
-
|
311 |
-
# Add version if available
|
312 |
if "version" in metadata:
|
313 |
component["version"] = metadata["version"]
|
314 |
-
|
315 |
-
# Add license if available
|
316 |
if "license" in metadata:
|
317 |
-
component["licenses"] = [{
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
}
|
322 |
-
|
323 |
-
|
324 |
-
component["externalReferences"] = [
|
325 |
-
{
|
326 |
-
"type": "website",
|
327 |
-
"url": f"https://huggingface.co/{model_id}"
|
328 |
-
}
|
329 |
-
]
|
330 |
-
|
331 |
-
# Add model card section
|
332 |
component["modelCard"] = self._create_model_card_section(metadata)
|
333 |
-
|
334 |
return component
|
335 |
-
|
336 |
def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
337 |
-
"""
|
338 |
-
Create the modelCard section of the component.
|
339 |
-
|
340 |
-
Args:
|
341 |
-
metadata: Extracted metadata
|
342 |
-
|
343 |
-
Returns:
|
344 |
-
ModelCard section as a dictionary
|
345 |
-
"""
|
346 |
model_card_section = {}
|
347 |
-
|
348 |
-
# Add model parameters if available
|
349 |
-
model_parameters = {}
|
350 |
-
for param in ["base_model", "library_name", "pipeline_tag"]:
|
351 |
-
if param in metadata and metadata[param]:
|
352 |
-
model_parameters[param] = metadata[param]
|
353 |
-
|
354 |
if model_parameters:
|
355 |
model_card_section["modelParameters"] = model_parameters
|
356 |
-
|
357 |
-
# Add quantitative analysis if available
|
358 |
if "eval_results" in metadata:
|
359 |
-
model_card_section["quantitativeAnalysis"] = {
|
360 |
-
|
361 |
-
}
|
362 |
-
|
363 |
-
# Add considerations if available
|
364 |
considerations = {}
|
365 |
-
for
|
366 |
-
if
|
367 |
-
considerations[
|
368 |
-
|
369 |
if considerations:
|
370 |
model_card_section["considerations"] = considerations
|
371 |
-
|
372 |
-
# Add properties if available
|
373 |
properties = []
|
374 |
for key, value in metadata.items():
|
375 |
-
if key not in ["name", "author", "license", "base_model", "library_name",
|
376 |
-
"pipeline_tag", "eval_results", "limitations",
|
377 |
-
"ethical_considerations", "bias", "risks"] and value is not None:
|
378 |
if isinstance(value, (list, dict)):
|
379 |
value = json.dumps(value)
|
380 |
-
properties.append({
|
381 |
-
|
382 |
-
"value": str(value)
|
383 |
-
})
|
384 |
-
|
385 |
if properties:
|
386 |
model_card_section["properties"] = properties
|
387 |
-
|
388 |
return model_card_section
|
|
|
|
|
|
|
|
|
|
|
1 |
import json
|
2 |
import uuid
|
3 |
import datetime
|
4 |
+
from typing import Dict, Optional, Any
|
5 |
|
6 |
+
from huggingface_hub import HfApi, ModelCard
|
7 |
|
8 |
|
9 |
class AIBOMGenerator:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def __init__(
|
11 |
self,
|
12 |
hf_token: Optional[str] = None,
|
|
|
14 |
use_inference: bool = True,
|
15 |
cache_dir: Optional[str] = None,
|
16 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
self.hf_api = HfApi(token=hf_token)
|
18 |
self.inference_model_url = inference_model_url
|
19 |
self.use_inference = use_inference
|
20 |
self.cache_dir = cache_dir
|
21 |
+
|
22 |
def generate_aibom(
|
23 |
self,
|
24 |
model_id: str,
|
25 |
output_file: Optional[str] = None,
|
26 |
include_inference: Optional[bool] = None,
|
27 |
) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
use_inference = include_inference if include_inference is not None else self.use_inference
|
|
|
|
|
29 |
model_info = self._fetch_model_info(model_id)
|
30 |
model_card = self._fetch_model_card(model_id)
|
|
|
|
|
31 |
aibom = self._create_aibom_structure(model_id, model_info, model_card, use_inference)
|
32 |
+
|
|
|
33 |
if output_file:
|
34 |
with open(output_file, 'w') as f:
|
35 |
json.dump(aibom, f, indent=2)
|
36 |
+
|
37 |
return aibom
|
38 |
+
|
39 |
def _fetch_model_info(self, model_id: str) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
try:
|
41 |
+
return self.hf_api.model_info(model_id)
|
|
|
42 |
except Exception as e:
|
|
|
43 |
print(f"Error fetching model info for {model_id}: {e}")
|
44 |
return {}
|
45 |
+
|
46 |
def _fetch_model_card(self, model_id: str) -> Optional[ModelCard]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
try:
|
48 |
+
return ModelCard.load(model_id)
|
|
|
49 |
except Exception as e:
|
|
|
50 |
print(f"Error fetching model card for {model_id}: {e}")
|
51 |
return None
|
52 |
+
|
53 |
def _create_aibom_structure(
|
54 |
self,
|
55 |
model_id: str,
|
|
|
57 |
model_card: Optional[ModelCard],
|
58 |
use_inference: bool,
|
59 |
) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
metadata = self._extract_structured_metadata(model_id, model_info, model_card)
|
61 |
+
|
|
|
62 |
if use_inference and model_card and self.inference_model_url:
|
63 |
unstructured_metadata = self._extract_unstructured_metadata(model_card)
|
|
|
64 |
metadata = {**unstructured_metadata, **metadata}
|
65 |
+
|
|
|
66 |
aibom = {
|
67 |
"bomFormat": "CycloneDX",
|
68 |
"specVersion": "1.6",
|
|
|
70 |
"version": 1,
|
71 |
"metadata": self._create_metadata_section(model_id, metadata),
|
72 |
"components": [self._create_component_section(model_id, metadata)],
|
73 |
+
"dependencies": [
|
74 |
+
{
|
75 |
+
"ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
|
76 |
+
"dependsOn": ["pkg:pypi/[email protected]"]
|
77 |
+
}
|
78 |
+
]
|
79 |
}
|
80 |
+
|
|
|
|
|
|
|
|
|
81 |
return aibom
|
82 |
+
|
83 |
def _extract_structured_metadata(
|
84 |
self,
|
85 |
model_id: str,
|
86 |
model_info: Dict[str, Any],
|
87 |
model_card: Optional[ModelCard],
|
88 |
) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
metadata = {}
|
90 |
+
|
|
|
91 |
if model_info:
|
92 |
metadata.update({
|
93 |
"name": model_info.modelId.split("/")[-1] if hasattr(model_info, "modelId") else model_id.split("/")[-1],
|
|
|
97 |
"downloads": model_info.downloads if hasattr(model_info, "downloads") else 0,
|
98 |
"last_modified": model_info.lastModified if hasattr(model_info, "lastModified") else None,
|
99 |
})
|
100 |
+
|
|
|
101 |
if model_card and model_card.data:
|
102 |
card_data = model_card.data.to_dict() if hasattr(model_card.data, "to_dict") else {}
|
|
|
|
|
103 |
metadata.update({
|
104 |
"language": card_data.get("language"),
|
105 |
"license": card_data.get("license"),
|
|
|
109 |
"model_name": card_data.get("model_name"),
|
110 |
"tags": card_data.get("tags", metadata.get("tags", [])),
|
111 |
})
|
|
|
|
|
112 |
if hasattr(model_card.data, "eval_results") and model_card.data.eval_results:
|
113 |
metadata["eval_results"] = model_card.data.eval_results
|
114 |
+
|
115 |
+
# AI-specific fields (manually added or inferred)
|
116 |
+
metadata["ai:type"] = "Transformer"
|
117 |
+
metadata["ai:task"] = metadata.get("pipeline_tag", "Text Generation")
|
118 |
+
metadata["ai:framework"] = "PyTorch" if "transformers" in metadata.get("library_name", "") else "Unknown"
|
119 |
+
|
120 |
+
if "DeepSeek-R1" in model_id:
|
121 |
+
metadata.update({
|
122 |
+
"ai:parameters": "672B total, 37B active per token",
|
123 |
+
"ai:training-data": "14.8 trillion tokens",
|
124 |
+
"ai:training-duration": "55 days",
|
125 |
+
"ai:training-cost": "$5.58 million",
|
126 |
+
"ai:hardware": "NVIDIA H800 GPUs"
|
127 |
+
})
|
128 |
+
|
129 |
return {k: v for k, v in metadata.items() if v is not None}
|
130 |
+
|
131 |
def _extract_unstructured_metadata(self, model_card: ModelCard) -> Dict[str, Any]:
|
132 |
+
return {} # Placeholder for inference model integration
|
133 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
def _create_metadata_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
timestamp = datetime.datetime.utcnow().isoformat() + "Z"
|
|
|
|
|
136 |
tools = [{
|
137 |
"vendor": "Aetheris AI",
|
138 |
"name": "aibom-generator",
|
139 |
+
"version": "0.1.0"
|
140 |
}]
|
141 |
+
|
|
|
142 |
authors = []
|
143 |
if "author" in metadata and metadata["author"]:
|
144 |
authors.append({
|
145 |
"name": metadata["author"],
|
146 |
"url": f"https://huggingface.co/{metadata['author']}"
|
147 |
})
|
148 |
+
|
|
|
149 |
component = {
|
150 |
"type": "machine-learning-model",
|
151 |
"name": metadata.get("name", model_id.split("/")[-1]),
|
152 |
+
"bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}"
|
153 |
}
|
154 |
+
|
|
|
155 |
properties = []
|
156 |
for key, value in metadata.items():
|
157 |
if key not in ["name", "author", "license"] and value is not None:
|
158 |
if isinstance(value, (list, dict)):
|
159 |
value = json.dumps(value)
|
160 |
+
properties.append({"name": key, "value": str(value)})
|
161 |
+
|
|
|
|
|
|
|
|
|
162 |
metadata_section = {
|
163 |
"timestamp": timestamp,
|
164 |
"tools": tools,
|
165 |
+
"component": component
|
166 |
}
|
167 |
+
|
168 |
if authors:
|
169 |
metadata_section["authors"] = authors
|
|
|
|
|
|
|
|
|
170 |
if properties:
|
171 |
metadata_section["properties"] = properties
|
172 |
+
|
173 |
return metadata_section
|
174 |
+
|
175 |
def _create_component_section(self, model_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
component = {
|
177 |
"type": "machine-learning-model",
|
178 |
+
"bom-ref": f"pkg:generic/{model_id.replace('/', '%2F')}",
|
179 |
"name": metadata.get("name", model_id.split("/")[-1]),
|
180 |
+
"purl": f"pkg:generic/{model_id.replace('/', '%2F')}"
|
181 |
}
|
182 |
+
|
|
|
183 |
if "description" in metadata:
|
184 |
component["description"] = metadata["description"]
|
185 |
+
|
|
|
186 |
if "version" in metadata:
|
187 |
component["version"] = metadata["version"]
|
188 |
+
|
|
|
189 |
if "license" in metadata:
|
190 |
+
component["licenses"] = [{"license": {"id": metadata["license"]}}]
|
191 |
+
|
192 |
+
component["externalReferences"] = [{
|
193 |
+
"type": "website",
|
194 |
+
"url": f"https://huggingface.co/{model_id}"
|
195 |
+
}]
|
196 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
component["modelCard"] = self._create_model_card_section(metadata)
|
198 |
+
|
199 |
return component
|
200 |
+
|
201 |
def _create_model_card_section(self, metadata: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
model_card_section = {}
|
203 |
+
model_parameters = {k: metadata[k] for k in ["base_model", "library_name", "pipeline_tag"] if k in metadata}
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
if model_parameters:
|
205 |
model_card_section["modelParameters"] = model_parameters
|
206 |
+
|
|
|
207 |
if "eval_results" in metadata:
|
208 |
+
model_card_section["quantitativeAnalysis"] = {"performanceMetrics": metadata["eval_results"]}
|
209 |
+
|
|
|
|
|
|
|
210 |
considerations = {}
|
211 |
+
for k in ["limitations", "ethical_considerations", "bias", "risks"]:
|
212 |
+
if k in metadata:
|
213 |
+
considerations[k] = metadata[k]
|
|
|
214 |
if considerations:
|
215 |
model_card_section["considerations"] = considerations
|
216 |
+
|
|
|
217 |
properties = []
|
218 |
for key, value in metadata.items():
|
219 |
+
if key not in ["name", "author", "license", "base_model", "library_name", "pipeline_tag", "eval_results", "limitations", "ethical_considerations", "bias", "risks"]:
|
|
|
|
|
220 |
if isinstance(value, (list, dict)):
|
221 |
value = json.dumps(value)
|
222 |
+
properties.append({"name": key, "value": str(value)})
|
223 |
+
|
|
|
|
|
|
|
224 |
if properties:
|
225 |
model_card_section["properties"] = properties
|
226 |
+
|
227 |
return model_card_section
|