a1c00l commited on
Commit
153042c
·
verified ·
1 Parent(s): 9034abb

Update src/aibom_generator/utils.py

Browse files
Files changed (1) hide show
  1. src/aibom_generator/utils.py +415 -5
src/aibom_generator/utils.py CHANGED
@@ -1,16 +1,27 @@
1
  """
2
- Utility functions for the AIBOM Generator with restored field_checklist support.
3
  """
4
-
5
  import json
6
  import logging
7
  import os
8
  import re
9
  import uuid
10
- from typing import Dict, List, Optional, Any, Union
 
 
 
11
 
12
  logger = logging.getLogger(__name__)
13
 
 
 
 
 
 
 
 
 
 
14
 
15
  def setup_logging(level=logging.INFO):
16
  logging.basicConfig(
@@ -96,7 +107,289 @@ def validate_spdx(license_entry):
96
  return license_entry in spdx_licenses
97
 
98
 
99
- def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  field_checklist = {}
101
  max_scores = {
102
  "required_fields": 20,
@@ -185,7 +478,7 @@ def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
185
  (ext_score * 0.10)
186
  )
187
 
188
- return {
189
  "total_score": round(total_score, 2),
190
  "section_scores": {
191
  "required_fields": required_score,
@@ -197,6 +490,123 @@ def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
197
  "max_scores": max_scores,
198
  "field_checklist": field_checklist
199
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
 
202
  def merge_metadata(primary: Dict[str, Any], secondary: Dict[str, Any]) -> Dict[str, Any]:
 
1
  """
2
+ Utility functions for the AIBOM Generator.
3
  """
 
4
  import json
5
  import logging
6
  import os
7
  import re
8
  import uuid
9
+ import requests
10
+ import jsonschema
11
+ from typing import Dict, List, Optional, Any, Union, Tuple
12
+ from enum import Enum
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
+ # CycloneDX schema URL for version 1.6
17
+ CYCLONEDX_SCHEMA_URL = "https://raw.githubusercontent.com/CycloneDX/specification/master/schema/bom-1.6.schema.json"
18
+
19
+ # Validation severity levels
20
+ class ValidationSeverity(Enum):
21
+ ERROR = "error"
22
+ WARNING = "warning"
23
+ INFO = "info"
24
+
25
 
26
  def setup_logging(level=logging.INFO):
27
  logging.basicConfig(
 
107
  return license_entry in spdx_licenses
108
 
109
 
110
+ def _load_cyclonedx_schema():
111
+ """
112
+ Load the CycloneDX JSON schema from the specified URL.
113
+
114
+ Returns:
115
+ dict: The loaded schema or a minimal schema if loading fails
116
+ """
117
+ try:
118
+ response = requests.get(CYCLONEDX_SCHEMA_URL)
119
+ response.raise_for_status()
120
+ schema = response.json()
121
+ logger.info(f"Successfully loaded CycloneDX schema from {CYCLONEDX_SCHEMA_URL}")
122
+ return schema
123
+ except Exception as e:
124
+ logger.error(f"Failed to load CycloneDX schema: {e}")
125
+ # Fallback to a minimal schema validation
126
+ return {
127
+ "type": "object",
128
+ "required": ["bomFormat", "specVersion", "serialNumber", "version"]
129
+ }
130
+
131
+
132
+ def _validate_schema(aibom: Dict[str, Any], schema: Dict[str, Any]) -> List[Dict[str, Any]]:
133
+ """
134
+ Validate an AIBOM against the CycloneDX JSON schema.
135
+
136
+ Args:
137
+ aibom: The AIBOM to validate
138
+ schema: The CycloneDX schema
139
+
140
+ Returns:
141
+ List of validation issues
142
+ """
143
+ issues = []
144
+
145
+ if not schema:
146
+ issues.append({
147
+ "severity": ValidationSeverity.WARNING.value,
148
+ "code": "SCHEMA_UNAVAILABLE",
149
+ "message": "CycloneDX schema unavailable, performing minimal validation",
150
+ "path": "$"
151
+ })
152
+ return issues
153
+
154
+ try:
155
+ jsonschema.validate(instance=aibom, schema=schema)
156
+ except jsonschema.exceptions.ValidationError as e:
157
+ # Extract the JSON path where the validation error occurred
158
+ path = ".".join(str(p) for p in e.path) if e.path else "$"
159
+
160
+ issues.append({
161
+ "severity": ValidationSeverity.ERROR.value,
162
+ "code": "SCHEMA_VALIDATION_ERROR",
163
+ "message": str(e),
164
+ "path": path
165
+ })
166
+ except Exception as e:
167
+ issues.append({
168
+ "severity": ValidationSeverity.ERROR.value,
169
+ "code": "SCHEMA_VALIDATION_EXCEPTION",
170
+ "message": f"Unexpected error during schema validation: {str(e)}",
171
+ "path": "$"
172
+ })
173
+
174
+ return issues
175
+
176
+
177
+ def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
178
+ """
179
+ Validate AI-specific requirements for an AIBOM.
180
+
181
+ Args:
182
+ aibom: The AIBOM to validate
183
+
184
+ Returns:
185
+ List of validation issues
186
+ """
187
+ issues = []
188
+
189
+ # Check basic structure
190
+ if "components" not in aibom or not aibom["components"]:
191
+ issues.append({
192
+ "severity": ValidationSeverity.ERROR.value,
193
+ "code": "MISSING_COMPONENTS",
194
+ "message": "AIBOM must contain at least one component",
195
+ "path": "$.components"
196
+ })
197
+ return issues # Can't continue validation without components
198
+
199
+ # Check for schema compliance issues with authors
200
+ if "metadata" in aibom and "authors" in aibom["metadata"]:
201
+ for i, author in enumerate(aibom["metadata"]["authors"]):
202
+ if "url" in author:
203
+ issues.append({
204
+ "severity": ValidationSeverity.ERROR.value,
205
+ "code": "INVALID_AUTHOR_PROPERTY",
206
+ "message": "Author object contains 'url' property which is not allowed in CycloneDX schema. Use 'email' instead.",
207
+ "path": f"$.metadata.authors[{i}].url"
208
+ })
209
+
210
+ # Validate each component
211
+ for i, component in enumerate(aibom["components"]):
212
+ component_path = f"$.components[{i}]"
213
+
214
+ # Check component type
215
+ if "type" not in component:
216
+ issues.append({
217
+ "severity": ValidationSeverity.ERROR.value,
218
+ "code": "MISSING_COMPONENT_TYPE",
219
+ "message": "Component must have a type",
220
+ "path": f"{component_path}.type"
221
+ })
222
+ elif component.get("type") != "machine-learning-model":
223
+ issues.append({
224
+ "severity": ValidationSeverity.WARNING.value,
225
+ "code": "INVALID_COMPONENT_TYPE",
226
+ "message": "Component type should be 'machine-learning-model' for AI components",
227
+ "path": f"{component_path}.type"
228
+ })
229
+
230
+ # Check PURL format
231
+ if "purl" not in component:
232
+ issues.append({
233
+ "severity": ValidationSeverity.ERROR.value,
234
+ "code": "MISSING_PURL",
235
+ "message": "Component must have a PURL",
236
+ "path": f"{component_path}.purl"
237
+ })
238
+ else:
239
+ purl = component["purl"]
240
+ if not purl.startswith("pkg:"):
241
+ issues.append({
242
+ "severity": ValidationSeverity.ERROR.value,
243
+ "code": "INVALID_PURL_FORMAT",
244
+ "message": "PURL must start with 'pkg:'",
245
+ "path": f"{component_path}.purl"
246
+ })
247
+ elif "huggingface" in purl and "@" not in purl and "version" in component:
248
+ issues.append({
249
+ "severity": ValidationSeverity.WARNING.value,
250
+ "code": "MISSING_VERSION_IN_PURL",
251
+ "message": "PURL should include version information with '@' for versioned components",
252
+ "path": f"{component_path}.purl"
253
+ })
254
+
255
+ # Check model card
256
+ if "modelCard" not in component:
257
+ issues.append({
258
+ "severity": ValidationSeverity.WARNING.value,
259
+ "code": "MISSING_MODEL_CARD",
260
+ "message": "AI component should include a model card",
261
+ "path": f"{component_path}.modelCard"
262
+ })
263
+ else:
264
+ model_card = component["modelCard"]
265
+ model_card_path = f"{component_path}.modelCard"
266
+
267
+ # Check model parameters
268
+ if "modelParameters" not in model_card:
269
+ issues.append({
270
+ "severity": ValidationSeverity.WARNING.value,
271
+ "code": "MISSING_MODEL_PARAMETERS",
272
+ "message": "Model card should include model parameters",
273
+ "path": f"{model_card_path}.modelParameters"
274
+ })
275
+
276
+ # Check considerations
277
+ if "considerations" not in model_card:
278
+ issues.append({
279
+ "severity": ValidationSeverity.INFO.value,
280
+ "code": "MISSING_CONSIDERATIONS",
281
+ "message": "Model card should include considerations section for ethical considerations, limitations, etc.",
282
+ "path": f"{model_card_path}.considerations"
283
+ })
284
+
285
+ # Validate metadata
286
+ if "metadata" not in aibom:
287
+ issues.append({
288
+ "severity": ValidationSeverity.ERROR.value,
289
+ "code": "MISSING_METADATA",
290
+ "message": "AIBOM must contain metadata",
291
+ "path": "$.metadata"
292
+ })
293
+ else:
294
+ metadata = aibom["metadata"]
295
+ metadata_path = "$.metadata"
296
+
297
+ # Check tools
298
+ if "tools" not in metadata or not metadata["tools"]:
299
+ issues.append({
300
+ "severity": ValidationSeverity.WARNING.value,
301
+ "code": "MISSING_TOOLS",
302
+ "message": "Metadata should include tools that generated the AIBOM",
303
+ "path": f"{metadata_path}.tools"
304
+ })
305
+
306
+ # Check authors
307
+ if "authors" not in metadata or not metadata["authors"]:
308
+ issues.append({
309
+ "severity": ValidationSeverity.INFO.value,
310
+ "code": "MISSING_AUTHORS",
311
+ "message": "Metadata should include authors information",
312
+ "path": f"{metadata_path}.authors"
313
+ })
314
+
315
+ # Check properties
316
+ if "properties" not in metadata or not metadata["properties"]:
317
+ issues.append({
318
+ "severity": ValidationSeverity.INFO.value,
319
+ "code": "MISSING_PROPERTIES",
320
+ "message": "Metadata should include properties for additional information",
321
+ "path": f"{metadata_path}.properties"
322
+ })
323
+
324
+ return issues
325
+
326
+
327
+ def _generate_validation_recommendations(issues: List[Dict[str, Any]]) -> List[str]:
328
+ """
329
+ Generate recommendations based on validation issues.
330
+
331
+ Args:
332
+ issues: List of validation issues
333
+
334
+ Returns:
335
+ List of recommendations
336
+ """
337
+ recommendations = []
338
+
339
+ # Group issues by code
340
+ issue_codes = set(issue["code"] for issue in issues)
341
+
342
+ # Generate recommendations based on issue codes
343
+ if "MISSING_COMPONENTS" in issue_codes:
344
+ recommendations.append("Add at least one component to the AIBOM")
345
+
346
+ if "MISSING_COMPONENT_TYPE" in issue_codes or "INVALID_COMPONENT_TYPE" in issue_codes:
347
+ recommendations.append("Ensure all AI components have type 'machine-learning-model'")
348
+
349
+ if "MISSING_PURL" in issue_codes or "INVALID_PURL_FORMAT" in issue_codes:
350
+ recommendations.append("Ensure all components have a valid PURL starting with 'pkg:'")
351
+
352
+ if "MISSING_VERSION_IN_PURL" in issue_codes:
353
+ recommendations.append("Include version information in PURLs using '@' syntax (e.g., pkg:huggingface/org/model@version)")
354
+
355
+ if "MISSING_MODEL_CARD" in issue_codes:
356
+ recommendations.append("Add a model card section to AI components")
357
+
358
+ if "MISSING_MODEL_PARAMETERS" in issue_codes:
359
+ recommendations.append("Include model parameters in the model card section")
360
+
361
+ if "MISSING_CONSIDERATIONS" in issue_codes:
362
+ recommendations.append("Add ethical considerations, limitations, and risks to the model card")
363
+
364
+ if "MISSING_METADATA" in issue_codes:
365
+ recommendations.append("Add metadata section to the AIBOM")
366
+
367
+ if "MISSING_TOOLS" in issue_codes:
368
+ recommendations.append("Include tools information in the metadata section")
369
+
370
+ if "MISSING_AUTHORS" in issue_codes:
371
+ recommendations.append("Add authors information to the metadata section")
372
+
373
+ if "MISSING_PROPERTIES" in issue_codes:
374
+ recommendations.append("Include additional properties in the metadata section")
375
+
376
+ if "INVALID_AUTHOR_PROPERTY" in issue_codes:
377
+ recommendations.append("Remove 'url' property from author objects and use 'email' instead to comply with CycloneDX schema")
378
+
379
+ return recommendations
380
+
381
+
382
+ def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True) -> Dict[str, Any]:
383
+ """
384
+ Calculate completeness score for an AIBOM and optionally validate against CycloneDX schema.
385
+
386
+ Args:
387
+ aibom: The AIBOM to score and validate
388
+ validate: Whether to perform validation against CycloneDX schema
389
+
390
+ Returns:
391
+ Dictionary containing score and validation results
392
+ """
393
  field_checklist = {}
394
  max_scores = {
395
  "required_fields": 20,
 
478
  (ext_score * 0.10)
479
  )
480
 
481
+ result = {
482
  "total_score": round(total_score, 2),
483
  "section_scores": {
484
  "required_fields": required_score,
 
490
  "max_scores": max_scores,
491
  "field_checklist": field_checklist
492
  }
493
+
494
+ # Add validation if requested
495
+ if validate:
496
+ validation_result = validate_aibom(aibom)
497
+ result["validation"] = validation_result
498
+
499
+ # Adjust score based on validation results
500
+ if not validation_result["valid"]:
501
+ # Count errors and warnings
502
+ error_count = validation_result["summary"]["error_count"]
503
+ warning_count = validation_result["summary"]["warning_count"]
504
+
505
+ # Apply penalties to the score
506
+ if error_count > 0:
507
+ # Severe penalty for errors (up to 50% reduction)
508
+ error_penalty = min(0.5, error_count * 0.1)
509
+ result["total_score"] = round(result["total_score"] * (1 - error_penalty), 2)
510
+ result["validation_penalty"] = f"-{int(error_penalty * 100)}% due to {error_count} schema errors"
511
+ elif warning_count > 0:
512
+ # Minor penalty for warnings (up to 20% reduction)
513
+ warning_penalty = min(0.2, warning_count * 0.05)
514
+ result["total_score"] = round(result["total_score"] * (1 - warning_penalty), 2)
515
+ result["validation_penalty"] = f"-{int(warning_penalty * 100)}% due to {warning_count} schema warnings"
516
+
517
+ return result
518
+
519
+
520
+ def validate_aibom(aibom: Dict[str, Any]) -> Dict[str, Any]:
521
+ """
522
+ Validate an AIBOM against the CycloneDX schema and AI-specific requirements.
523
+
524
+ Args:
525
+ aibom: The AIBOM to validate
526
+
527
+ Returns:
528
+ Validation report with issues and recommendations
529
+ """
530
+ # Initialize validation report
531
+ report = {
532
+ "valid": True,
533
+ "schema_valid": True,
534
+ "ai_valid": True,
535
+ "issues": [],
536
+ "recommendations": [],
537
+ "summary": {
538
+ "error_count": 0,
539
+ "warning_count": 0,
540
+ "info_count": 0
541
+ }
542
+ }
543
+
544
+ # Load schema
545
+ schema = _load_cyclonedx_schema()
546
+
547
+ # Validate against CycloneDX schema
548
+ schema_issues = _validate_schema(aibom, schema)
549
+ if schema_issues:
550
+ report["schema_valid"] = False
551
+ report["valid"] = False
552
+ report["issues"].extend(schema_issues)
553
+
554
+ # Validate AI-specific requirements
555
+ ai_issues = _validate_ai_requirements(aibom)
556
+ if ai_issues:
557
+ report["ai_valid"] = False
558
+ report["valid"] = False
559
+ report["issues"].extend(ai_issues)
560
+
561
+ # Generate recommendations
562
+ report["recommendations"] = _generate_validation_recommendations(report["issues"])
563
+
564
+ # Update summary counts
565
+ for issue in report["issues"]:
566
+ if issue["severity"] == ValidationSeverity.ERROR.value:
567
+ report["summary"]["error_count"] += 1
568
+ elif issue["severity"] == ValidationSeverity.WARNING.value:
569
+ report["summary"]["warning_count"] += 1
570
+ elif issue["severity"] == ValidationSeverity.INFO.value:
571
+ report["summary"]["info_count"] += 1
572
+
573
+ return report
574
+
575
+
576
+ def get_validation_summary(report: Dict[str, Any]) -> str:
577
+ """
578
+ Get a human-readable summary of the validation report.
579
+
580
+ Args:
581
+ report: Validation report
582
+
583
+ Returns:
584
+ Human-readable summary
585
+ """
586
+ if report["valid"]:
587
+ summary = "✅ AIBOM is valid and complies with CycloneDX schema and AI requirements.\n"
588
+ else:
589
+ summary = "❌ AIBOM validation failed.\n"
590
+
591
+ summary += f"\nSummary:\n"
592
+ summary += f"- Errors: {report['summary']['error_count']}\n"
593
+ summary += f"- Warnings: {report['summary']['warning_count']}\n"
594
+ summary += f"- Info: {report['summary']['info_count']}\n"
595
+
596
+ if not report["valid"]:
597
+ summary += "\nIssues:\n"
598
+ for issue in report["issues"]:
599
+ severity = issue["severity"].upper()
600
+ code = issue["code"]
601
+ message = issue["message"]
602
+ path = issue["path"]
603
+ summary += f"- [{severity}] {code}: {message} (at {path})\n"
604
+
605
+ summary += "\nRecommendations:\n"
606
+ for i, recommendation in enumerate(report["recommendations"], 1):
607
+ summary += f"{i}. {recommendation}\n"
608
+
609
+ return summary
610
 
611
 
612
  def merge_metadata(primary: Dict[str, Any], secondary: Dict[str, Any]) -> Dict[str, Any]: