a1c00l commited on
Commit
240512e
Β·
verified Β·
1 Parent(s): d05fd64

Update src/aibom_generator/utils.py

Browse files
Files changed (1) hide show
  1. src/aibom_generator/utils.py +443 -1
src/aibom_generator/utils.py CHANGED
@@ -18,6 +18,109 @@ class ValidationSeverity(Enum):
18
  WARNING = "warning"
19
  INFO = "info"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def setup_logging(level=logging.INFO):
23
  logging.basicConfig(
@@ -103,6 +206,233 @@ def validate_spdx(license_entry):
103
  return license_entry in spdx_licenses
104
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
107
  """
108
  Validate AI-specific requirements for an AIBOM.
@@ -389,17 +719,129 @@ def get_validation_summary(report: Dict[str, Any]) -> str:
389
  return summary
390
 
391
 
392
- def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  """
394
  Calculate completeness score for an AIBOM and optionally validate against AI requirements.
 
395
 
396
  Args:
397
  aibom: The AIBOM to score and validate
398
  validate: Whether to perform validation
 
399
 
400
  Returns:
401
  Dictionary containing score and validation results
402
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  field_checklist = {}
404
  max_scores = {
405
  "required_fields": 20,
 
18
  WARNING = "warning"
19
  INFO = "info"
20
 
21
+ # Field classification based on documentation value (silently aligned with SPDX)
22
+ FIELD_CLASSIFICATION = {
23
+ # Critical fields (silently aligned with SPDX mandatory fields)
24
+ "bomFormat": {"tier": "critical", "weight": 3, "category": "required_fields"},
25
+ "specVersion": {"tier": "critical", "weight": 3, "category": "required_fields"},
26
+ "serialNumber": {"tier": "critical", "weight": 3, "category": "required_fields"},
27
+ "version": {"tier": "critical", "weight": 3, "category": "required_fields"},
28
+ "buildTime": {"tier": "critical", "weight": 4, "category": "required_fields"},
29
+ "releaseTime": {"tier": "critical", "weight": 4, "category": "required_fields"},
30
+ "name": {"tier": "critical", "weight": 4, "category": "component_basic"},
31
+ "downloadLocation": {"tier": "critical", "weight": 4, "category": "external_references"},
32
+ "primaryPurpose": {"tier": "critical", "weight": 3, "category": "metadata"},
33
+ "suppliedBy": {"tier": "critical", "weight": 4, "category": "metadata"},
34
+
35
+ # Important fields (aligned with key SPDX optional fields)
36
+ "type": {"tier": "important", "weight": 2, "category": "component_basic"},
37
+ "purl": {"tier": "important", "weight": 4, "category": "component_basic"},
38
+ "description": {"tier": "important", "weight": 4, "category": "component_basic"},
39
+ "licenses": {"tier": "important", "weight": 4, "category": "component_basic"},
40
+ "energyConsumption": {"tier": "important", "weight": 3, "category": "component_model_card"},
41
+ "hyperparameter": {"tier": "important", "weight": 3, "category": "component_model_card"},
42
+ "limitation": {"tier": "important", "weight": 3, "category": "component_model_card"},
43
+ "safetyRiskAssessment": {"tier": "important", "weight": 3, "category": "component_model_card"},
44
+ "typeOfModel": {"tier": "important", "weight": 3, "category": "component_model_card"},
45
+
46
+ # Supplementary fields (aligned with remaining SPDX optional fields)
47
+ "modelExplainability": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
48
+ "standardCompliance": {"tier": "supplementary", "weight": 2, "category": "metadata"},
49
+ "domain": {"tier": "supplementary", "weight": 2, "category": "metadata"},
50
+ "energyQuantity": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
51
+ "energyUnit": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
52
+ "informationAboutTraining": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
53
+ "informationAboutApplication": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
54
+ "metric": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
55
+ "metricDecisionThreshold": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
56
+ "modelDataPreprocessing": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
57
+ "autonomyType": {"tier": "supplementary", "weight": 1, "category": "metadata"},
58
+ "useSensitivePersonalInformation": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}
59
+ }
60
+
61
+ # Completeness profiles (silently aligned with SPDX requirements)
62
+ COMPLETENESS_PROFILES = {
63
+ "basic": {
64
+ "description": "Minimal fields required for identification",
65
+ "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name"],
66
+ "minimum_score": 40
67
+ },
68
+ "standard": {
69
+ "description": "Comprehensive fields for proper documentation",
70
+ "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name",
71
+ "buildTime", "releaseTime", "downloadLocation", "primaryPurpose", "suppliedBy"],
72
+ "minimum_score": 70
73
+ },
74
+ "advanced": {
75
+ "description": "Extensive documentation for maximum transparency",
76
+ "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name",
77
+ "buildTime", "releaseTime", "downloadLocation", "primaryPurpose", "suppliedBy",
78
+ "type", "purl", "description", "licenses", "hyperparameter", "limitation",
79
+ "energyConsumption", "safetyRiskAssessment", "typeOfModel"],
80
+ "minimum_score": 85
81
+ }
82
+ }
83
+
84
+ # Validation messages framed as best practices
85
+ VALIDATION_MESSAGES = {
86
+ "buildTime": {
87
+ "missing": "Missing critical field: buildTime - required for comprehensive version tracking",
88
+ "recommendation": "Add build timestamp to enable proper versioning and reproducibility"
89
+ },
90
+ "releaseTime": {
91
+ "missing": "Missing critical field: releaseTime - important for tracking release history",
92
+ "recommendation": "Add release timestamp to document when this version was released"
93
+ },
94
+ "name": {
95
+ "missing": "Missing critical field: name - essential for model identification",
96
+ "recommendation": "Add a descriptive name for the model"
97
+ },
98
+ "downloadLocation": {
99
+ "missing": "Missing critical field: downloadLocation - needed for artifact retrieval",
100
+ "recommendation": "Add information about where the model can be downloaded"
101
+ },
102
+ "primaryPurpose": {
103
+ "missing": "Missing critical field: primaryPurpose - important for understanding model intent",
104
+ "recommendation": "Add information about the primary purpose of this model"
105
+ },
106
+ "suppliedBy": {
107
+ "missing": "Missing critical field: suppliedBy - needed for provenance tracking",
108
+ "recommendation": "Add information about who supplied this model"
109
+ },
110
+ "energyConsumption": {
111
+ "missing": "Missing important field: energyConsumption - helpful for environmental impact assessment",
112
+ "recommendation": "Consider documenting energy consumption metrics for better transparency"
113
+ },
114
+ "hyperparameter": {
115
+ "missing": "Missing important field: hyperparameter - valuable for reproducibility",
116
+ "recommendation": "Document key hyperparameters used in training"
117
+ },
118
+ "limitation": {
119
+ "missing": "Missing important field: limitation - important for responsible use",
120
+ "recommendation": "Document known limitations of the model to guide appropriate usage"
121
+ }
122
+ }
123
+
124
 
125
  def setup_logging(level=logging.INFO):
126
  logging.basicConfig(
 
206
  return license_entry in spdx_licenses
207
 
208
 
209
+ def check_field_in_aibom(aibom: Dict[str, Any], field: str) -> bool:
210
+ """
211
+ Check if a field is present in the AIBOM.
212
+
213
+ Args:
214
+ aibom: The AIBOM to check
215
+ field: The field name to check
216
+
217
+ Returns:
218
+ True if the field is present, False otherwise
219
+ """
220
+ # Check in root level
221
+ if field in aibom:
222
+ return True
223
+
224
+ # Check in metadata
225
+ if "metadata" in aibom:
226
+ metadata = aibom["metadata"]
227
+ if field in metadata:
228
+ return True
229
+
230
+ # Check in metadata properties
231
+ if "properties" in metadata:
232
+ for prop in metadata["properties"]:
233
+ if prop.get("name") == f"spdx:{field}" or prop.get("name") == field:
234
+ return True
235
+
236
+ # Check in components
237
+ if "components" in aibom and aibom["components"]:
238
+ component = aibom["components"][0] # Use first component
239
+
240
+ if field in component:
241
+ return True
242
+
243
+ # Check in component properties
244
+ if "properties" in component:
245
+ for prop in component["properties"]:
246
+ if prop.get("name") == f"spdx:{field}" or prop.get("name") == field:
247
+ return True
248
+
249
+ # Check in model card
250
+ if "modelCard" in component:
251
+ model_card = component["modelCard"]
252
+
253
+ if field in model_card:
254
+ return True
255
+
256
+ # Check in model parameters
257
+ if "modelParameters" in model_card:
258
+ if field in model_card["modelParameters"]:
259
+ return True
260
+
261
+ # Check in model parameters properties
262
+ if "properties" in model_card["modelParameters"]:
263
+ for prop in model_card["modelParameters"]["properties"]:
264
+ if prop.get("name") == f"spdx:{field}" or prop.get("name") == field:
265
+ return True
266
+
267
+ # Check in considerations
268
+ if "considerations" in model_card:
269
+ if field in model_card["considerations"]:
270
+ return True
271
+
272
+ # Check in specific consideration sections
273
+ for section in ["technicalLimitations", "ethicalConsiderations", "environmentalConsiderations"]:
274
+ if section in model_card["considerations"]:
275
+ if field == "limitation" and section == "technicalLimitations":
276
+ return True
277
+ if field == "safetyRiskAssessment" and section == "ethicalConsiderations":
278
+ return True
279
+ if field == "energyConsumption" and section == "environmentalConsiderations":
280
+ return True
281
+
282
+ # Check in external references
283
+ if field == "downloadLocation" and "externalReferences" in aibom:
284
+ for ref in aibom["externalReferences"]:
285
+ if ref.get("type") == "distribution":
286
+ return True
287
+
288
+ return False
289
+
290
+
291
+ def determine_completeness_profile(aibom: Dict[str, Any], score: float) -> Dict[str, Any]:
292
+ """
293
+ Determine which completeness profile the AIBOM satisfies.
294
+
295
+ Args:
296
+ aibom: The AIBOM to check
297
+ score: The calculated score
298
+
299
+ Returns:
300
+ Dictionary with profile information
301
+ """
302
+ satisfied_profiles = []
303
+
304
+ for profile_name, profile in COMPLETENESS_PROFILES.items():
305
+ # Check if all required fields are present
306
+ all_required_present = all(check_field_in_aibom(aibom, field) for field in profile["required_fields"])
307
+
308
+ # Check if score meets minimum
309
+ score_sufficient = score >= profile["minimum_score"]
310
+
311
+ if all_required_present and score_sufficient:
312
+ satisfied_profiles.append(profile_name)
313
+
314
+ # Return the highest satisfied profile
315
+ if "advanced" in satisfied_profiles:
316
+ return {
317
+ "name": "advanced",
318
+ "description": COMPLETENESS_PROFILES["advanced"]["description"],
319
+ "satisfied": True
320
+ }
321
+ elif "standard" in satisfied_profiles:
322
+ return {
323
+ "name": "standard",
324
+ "description": COMPLETENESS_PROFILES["standard"]["description"],
325
+ "satisfied": True
326
+ }
327
+ elif "basic" in satisfied_profiles:
328
+ return {
329
+ "name": "basic",
330
+ "description": COMPLETENESS_PROFILES["basic"]["description"],
331
+ "satisfied": True
332
+ }
333
+ else:
334
+ return {
335
+ "name": "incomplete",
336
+ "description": "Does not satisfy any completeness profile",
337
+ "satisfied": False
338
+ }
339
+
340
+
341
+ def apply_completeness_penalties(original_score: float, missing_fields: Dict[str, List[str]]) -> Dict[str, Any]:
342
+ """
343
+ Apply penalties based on missing critical fields.
344
+
345
+ Args:
346
+ original_score: The original calculated score
347
+ missing_fields: Dictionary of missing fields by tier
348
+
349
+ Returns:
350
+ Dictionary with penalty information
351
+ """
352
+ # Count missing fields by tier
353
+ missing_critical_count = len(missing_fields["critical"])
354
+ missing_important_count = len(missing_fields["important"])
355
+
356
+ # Calculate penalty based on missing critical fields
357
+ if missing_critical_count > 3:
358
+ penalty_factor = 0.8 # 20% penalty
359
+ penalty_reason = "Multiple critical fields missing"
360
+ elif missing_critical_count > 0:
361
+ penalty_factor = 0.9 # 10% penalty
362
+ penalty_reason = "Some critical fields missing"
363
+ elif missing_important_count > 5:
364
+ penalty_factor = 0.95 # 5% penalty
365
+ penalty_reason = "Several important fields missing"
366
+ else:
367
+ # No penalty
368
+ penalty_factor = 1.0
369
+ penalty_reason = None
370
+
371
+ adjusted_score = original_score * penalty_factor
372
+
373
+ return {
374
+ "adjusted_score": round(adjusted_score, 2),
375
+ "penalty_applied": penalty_reason is not None,
376
+ "penalty_reason": penalty_reason,
377
+ "penalty_factor": penalty_factor
378
+ }
379
+
380
+
381
+ def generate_field_recommendations(missing_fields: Dict[str, List[str]]) -> List[Dict[str, Any]]:
382
+ """
383
+ Generate recommendations for missing fields.
384
+
385
+ Args:
386
+ missing_fields: Dictionary of missing fields by tier
387
+
388
+ Returns:
389
+ List of recommendations
390
+ """
391
+ recommendations = []
392
+
393
+ # Prioritize critical fields
394
+ for field in missing_fields["critical"]:
395
+ if field in VALIDATION_MESSAGES:
396
+ recommendations.append({
397
+ "priority": "high",
398
+ "field": field,
399
+ "message": VALIDATION_MESSAGES[field]["missing"],
400
+ "recommendation": VALIDATION_MESSAGES[field]["recommendation"]
401
+ })
402
+ else:
403
+ recommendations.append({
404
+ "priority": "high",
405
+ "field": field,
406
+ "message": f"Missing critical field: {field}",
407
+ "recommendation": f"Add {field} information to improve documentation completeness"
408
+ })
409
+
410
+ # Then suggest important fields (limit to top 3)
411
+ important_count = 0
412
+ for field in missing_fields["important"]:
413
+ if important_count >= 3:
414
+ break
415
+
416
+ if field in VALIDATION_MESSAGES:
417
+ recommendations.append({
418
+ "priority": "medium",
419
+ "field": field,
420
+ "message": VALIDATION_MESSAGES[field]["missing"],
421
+ "recommendation": VALIDATION_MESSAGES[field]["recommendation"]
422
+ })
423
+ important_count += 1
424
+ else:
425
+ recommendations.append({
426
+ "priority": "medium",
427
+ "field": field,
428
+ "message": f"Missing important field: {field}",
429
+ "recommendation": f"Consider adding {field} information to enhance documentation"
430
+ })
431
+ important_count += 1
432
+
433
+ return recommendations
434
+
435
+
436
  def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
437
  """
438
  Validate AI-specific requirements for an AIBOM.
 
719
  return summary
720
 
721
 
722
+ def calculate_industry_neutral_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
723
+ """
724
+ Calculate completeness score using industry best practices without explicit standard references.
725
+
726
+ Args:
727
+ aibom: The AIBOM to score
728
+
729
+ Returns:
730
+ Dictionary containing score and recommendations
731
+ """
732
+ field_checklist = {}
733
+ max_scores = {
734
+ "required_fields": 20,
735
+ "metadata": 20,
736
+ "component_basic": 20,
737
+ "component_model_card": 30,
738
+ "external_references": 10
739
+ }
740
+
741
+ # Track missing fields by tier
742
+ missing_fields = {
743
+ "critical": [],
744
+ "important": [],
745
+ "supplementary": []
746
+ }
747
+
748
+ # Score each field based on classification
749
+ scores_by_category = {category: 0 for category in max_scores.keys()}
750
+
751
+ for field, classification in FIELD_CLASSIFICATION.items():
752
+ tier = classification["tier"]
753
+ weight = classification["weight"]
754
+ category = classification["category"]
755
+
756
+ # Check if field is present
757
+ is_present = check_field_in_aibom(aibom, field)
758
+
759
+ if is_present:
760
+ scores_by_category[category] += weight
761
+ else:
762
+ missing_fields[tier].append(field)
763
+
764
+ # Add to field checklist with appropriate indicators
765
+ importance_indicator = "β˜…β˜…β˜…" if tier == "critical" else "β˜…β˜…" if tier == "important" else "β˜…"
766
+ field_checklist[field] = f"{'βœ”' if is_present else '✘'} {importance_indicator}"
767
+
768
+ # Normalize category scores to max_scores
769
+ for category in scores_by_category:
770
+ scores_by_category[category] = min(scores_by_category[category], max_scores[category])
771
+
772
+ # Calculate total score (sum of weighted category scores)
773
+ total_score = 0
774
+ for category in scores_by_category:
775
+ category_weight = max_scores[category] / sum(max_scores.values())
776
+ total_score += scores_by_category[category] * category_weight
777
+
778
+ total_score = total_score * 100 # Convert to percentage
779
+
780
+ # Determine completeness profile
781
+ profile = determine_completeness_profile(aibom, total_score)
782
+
783
+ # Apply penalties for missing critical fields
784
+ penalty_result = apply_completeness_penalties(total_score, missing_fields)
785
+
786
+ # Generate recommendations
787
+ recommendations = generate_field_recommendations(missing_fields)
788
+
789
+ return {
790
+ "total_score": penalty_result["adjusted_score"],
791
+ "section_scores": scores_by_category,
792
+ "max_scores": max_scores,
793
+ "field_checklist": field_checklist,
794
+ "missing_fields": missing_fields,
795
+ "completeness_profile": profile,
796
+ "penalty_applied": penalty_result["penalty_applied"],
797
+ "penalty_reason": penalty_result["penalty_reason"],
798
+ "recommendations": recommendations
799
+ }
800
+
801
+
802
+ def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True, use_best_practices: bool = True) -> Dict[str, Any]:
803
  """
804
  Calculate completeness score for an AIBOM and optionally validate against AI requirements.
805
+ Enhanced with industry best practices scoring.
806
 
807
  Args:
808
  aibom: The AIBOM to score and validate
809
  validate: Whether to perform validation
810
+ use_best_practices: Whether to use enhanced industry best practices scoring
811
 
812
  Returns:
813
  Dictionary containing score and validation results
814
  """
815
+ # If using best practices scoring, use the enhanced industry-neutral approach
816
+ if use_best_practices:
817
+ result = calculate_industry_neutral_score(aibom)
818
+
819
+ # Add validation if requested
820
+ if validate:
821
+ validation_result = validate_aibom(aibom)
822
+ result["validation"] = validation_result
823
+
824
+ # Adjust score based on validation results
825
+ if not validation_result["valid"]:
826
+ # Count errors and warnings
827
+ error_count = validation_result["summary"]["error_count"]
828
+ warning_count = validation_result["summary"]["warning_count"]
829
+
830
+ # Apply penalties to the score
831
+ if error_count > 0:
832
+ # Severe penalty for errors (up to 50% reduction)
833
+ error_penalty = min(0.5, error_count * 0.1)
834
+ result["total_score"] = round(result["total_score"] * (1 - error_penalty), 2)
835
+ result["validation_penalty"] = f"-{int(error_penalty * 100)}% due to {error_count} schema errors"
836
+ elif warning_count > 0:
837
+ # Minor penalty for warnings (up to 20% reduction)
838
+ warning_penalty = min(0.2, warning_count * 0.05)
839
+ result["total_score"] = round(result["total_score"] * (1 - warning_penalty), 2)
840
+ result["validation_penalty"] = f"-{int(warning_penalty * 100)}% due to {warning_count} schema warnings"
841
+
842
+ return result
843
+
844
+ # Otherwise, use the original scoring method
845
  field_checklist = {}
846
  max_scores = {
847
  "required_fields": 20,