Spaces:

aetheris-ai
/

aibom-generator

Running

App Files Files

a1c00l commited on 26 days ago

Commit

153042c

verified ·

1 Parent(s): 9034abb

Update src/aibom_generator/utils.py

Browse files

Files changed (1) hide show

src/aibom_generator/utils.py +415 -5

src/aibom_generator/utils.py CHANGED Viewed

@@ -1,16 +1,27 @@
 """
-Utility functions for the AIBOM Generator with restored field_checklist support.
 """
 import json
 import logging
 import os
 import re
 import uuid
-from typing import Dict, List, Optional, Any, Union
 logger = logging.getLogger(__name__)
 def setup_logging(level=logging.INFO):
     logging.basicConfig(
@@ -96,7 +107,289 @@ def validate_spdx(license_entry):
     return license_entry in spdx_licenses
-def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
     field_checklist = {}
     max_scores = {
         "required_fields": 20,
@@ -185,7 +478,7 @@ def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
         (ext_score * 0.10)
     )
-    return {
         "total_score": round(total_score, 2),
         "section_scores": {
             "required_fields": required_score,
@@ -197,6 +490,123 @@ def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
         "max_scores": max_scores,
         "field_checklist": field_checklist
     }
 def merge_metadata(primary: Dict[str, Any], secondary: Dict[str, Any]) -> Dict[str, Any]:

 """
+Utility functions for the AIBOM Generator.
 """
 import json
 import logging
 import os
 import re
 import uuid
+import requests
+import jsonschema
+from typing import Dict, List, Optional, Any, Union, Tuple
+from enum import Enum
 logger = logging.getLogger(__name__)
+# CycloneDX schema URL for version 1.6
+CYCLONEDX_SCHEMA_URL = "https://raw.githubusercontent.com/CycloneDX/specification/master/schema/bom-1.6.schema.json"
+# Validation severity levels
+class ValidationSeverity(Enum):
+    ERROR = "error"
+    WARNING = "warning"
+    INFO = "info"
 def setup_logging(level=logging.INFO):
     logging.basicConfig(
     return license_entry in spdx_licenses
+def _load_cyclonedx_schema():
+    """
+    Load the CycloneDX JSON schema from the specified URL.
+    Returns:
+        dict: The loaded schema or a minimal schema if loading fails
+    """
+    try:
+        response = requests.get(CYCLONEDX_SCHEMA_URL)
+        response.raise_for_status()
+        schema = response.json()
+        logger.info(f"Successfully loaded CycloneDX schema from {CYCLONEDX_SCHEMA_URL}")
+        return schema
+    except Exception as e:
+        logger.error(f"Failed to load CycloneDX schema: {e}")
+        # Fallback to a minimal schema validation
+        return {
+            "type": "object",
+            "required": ["bomFormat", "specVersion", "serialNumber", "version"]
+        }
+def _validate_schema(aibom: Dict[str, Any], schema: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """
+    Validate an AIBOM against the CycloneDX JSON schema.
+    Args:
+        aibom: The AIBOM to validate
+        schema: The CycloneDX schema
+    Returns:
+        List of validation issues
+    """
+    issues = []
+    if not schema:
+        issues.append({
+            "severity": ValidationSeverity.WARNING.value,
+            "code": "SCHEMA_UNAVAILABLE",
+            "message": "CycloneDX schema unavailable, performing minimal validation",
+            "path": "$"
+        })
+        return issues
+    try:
+        jsonschema.validate(instance=aibom, schema=schema)
+    except jsonschema.exceptions.ValidationError as e:
+        # Extract the JSON path where the validation error occurred
+        path = ".".join(str(p) for p in e.path) if e.path else "$"
+        issues.append({
+            "severity": ValidationSeverity.ERROR.value,
+            "code": "SCHEMA_VALIDATION_ERROR",
+            "message": str(e),
+            "path": path
+        })
+    except Exception as e:
+        issues.append({
+            "severity": ValidationSeverity.ERROR.value,
+            "code": "SCHEMA_VALIDATION_EXCEPTION",
+            "message": f"Unexpected error during schema validation: {str(e)}",
+            "path": "$"
+        })
+    return issues
+def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """
+    Validate AI-specific requirements for an AIBOM.
+    Args:
+        aibom: The AIBOM to validate
+    Returns:
+        List of validation issues
+    """
+    issues = []
+    # Check basic structure
+    if "components" not in aibom or not aibom["components"]:
+        issues.append({
+            "severity": ValidationSeverity.ERROR.value,
+            "code": "MISSING_COMPONENTS",
+            "message": "AIBOM must contain at least one component",
+            "path": "$.components"
+        })
+        return issues  # Can't continue validation without components
+    # Check for schema compliance issues with authors
+    if "metadata" in aibom and "authors" in aibom["metadata"]:
+        for i, author in enumerate(aibom["metadata"]["authors"]):
+            if "url" in author:
+                issues.append({
+                    "severity": ValidationSeverity.ERROR.value,
+                    "code": "INVALID_AUTHOR_PROPERTY",
+                    "message": "Author object contains 'url' property which is not allowed in CycloneDX schema. Use 'email' instead.",
+                    "path": f"$.metadata.authors[{i}].url"
+                })
+    # Validate each component
+    for i, component in enumerate(aibom["components"]):
+        component_path = f"$.components[{i}]"
+        # Check component type
+        if "type" not in component:
+            issues.append({
+                "severity": ValidationSeverity.ERROR.value,
+                "code": "MISSING_COMPONENT_TYPE",
+                "message": "Component must have a type",
+                "path": f"{component_path}.type"
+            })
+        elif component.get("type") != "machine-learning-model":
+            issues.append({
+                "severity": ValidationSeverity.WARNING.value,
+                "code": "INVALID_COMPONENT_TYPE",
+                "message": "Component type should be 'machine-learning-model' for AI components",
+                "path": f"{component_path}.type"
+            })
+        # Check PURL format
+        if "purl" not in component:
+            issues.append({
+                "severity": ValidationSeverity.ERROR.value,
+                "code": "MISSING_PURL",
+                "message": "Component must have a PURL",
+                "path": f"{component_path}.purl"
+            })
+        else:
+            purl = component["purl"]
+            if not purl.startswith("pkg:"):
+                issues.append({
+                    "severity": ValidationSeverity.ERROR.value,
+                    "code": "INVALID_PURL_FORMAT",
+                    "message": "PURL must start with 'pkg:'",
+                    "path": f"{component_path}.purl"
+                })
+            elif "huggingface" in purl and "@" not in purl and "version" in component:
+                issues.append({
+                    "severity": ValidationSeverity.WARNING.value,
+                    "code": "MISSING_VERSION_IN_PURL",
+                    "message": "PURL should include version information with '@' for versioned components",
+                    "path": f"{component_path}.purl"
+                })
+        # Check model card
+        if "modelCard" not in component:
+            issues.append({
+                "severity": ValidationSeverity.WARNING.value,
+                "code": "MISSING_MODEL_CARD",
+                "message": "AI component should include a model card",
+                "path": f"{component_path}.modelCard"
+            })
+        else:
+            model_card = component["modelCard"]
+            model_card_path = f"{component_path}.modelCard"
+            # Check model parameters
+            if "modelParameters" not in model_card:
+                issues.append({
+                    "severity": ValidationSeverity.WARNING.value,
+                    "code": "MISSING_MODEL_PARAMETERS",
+                    "message": "Model card should include model parameters",
+                    "path": f"{model_card_path}.modelParameters"
+                })
+            # Check considerations
+            if "considerations" not in model_card:
+                issues.append({
+                    "severity": ValidationSeverity.INFO.value,
+                    "code": "MISSING_CONSIDERATIONS",
+                    "message": "Model card should include considerations section for ethical considerations, limitations, etc.",
+                    "path": f"{model_card_path}.considerations"
+                })
+    # Validate metadata
+    if "metadata" not in aibom:
+        issues.append({
+            "severity": ValidationSeverity.ERROR.value,
+            "code": "MISSING_METADATA",
+            "message": "AIBOM must contain metadata",
+            "path": "$.metadata"
+        })
+    else:
+        metadata = aibom["metadata"]
+        metadata_path = "$.metadata"
+        # Check tools
+        if "tools" not in metadata or not metadata["tools"]:
+            issues.append({
+                "severity": ValidationSeverity.WARNING.value,
+                "code": "MISSING_TOOLS",
+                "message": "Metadata should include tools that generated the AIBOM",
+                "path": f"{metadata_path}.tools"
+            })
+        # Check authors
+        if "authors" not in metadata or not metadata["authors"]:
+            issues.append({
+                "severity": ValidationSeverity.INFO.value,
+                "code": "MISSING_AUTHORS",
+                "message": "Metadata should include authors information",
+                "path": f"{metadata_path}.authors"
+            })
+        # Check properties
+        if "properties" not in metadata or not metadata["properties"]:
+            issues.append({
+                "severity": ValidationSeverity.INFO.value,
+                "code": "MISSING_PROPERTIES",
+                "message": "Metadata should include properties for additional information",
+                "path": f"{metadata_path}.properties"
+            })
+    return issues
+def _generate_validation_recommendations(issues: List[Dict[str, Any]]) -> List[str]:
+    """
+    Generate recommendations based on validation issues.
+    Args:
+        issues: List of validation issues
+    Returns:
+        List of recommendations
+    """
+    recommendations = []
+    # Group issues by code
+    issue_codes = set(issue["code"] for issue in issues)
+    # Generate recommendations based on issue codes
+    if "MISSING_COMPONENTS" in issue_codes:
+        recommendations.append("Add at least one component to the AIBOM")
+    if "MISSING_COMPONENT_TYPE" in issue_codes or "INVALID_COMPONENT_TYPE" in issue_codes:
+        recommendations.append("Ensure all AI components have type 'machine-learning-model'")
+    if "MISSING_PURL" in issue_codes or "INVALID_PURL_FORMAT" in issue_codes:
+        recommendations.append("Ensure all components have a valid PURL starting with 'pkg:'")
+    if "MISSING_VERSION_IN_PURL" in issue_codes:
+        recommendations.append("Include version information in PURLs using '@' syntax (e.g., pkg:huggingface/org/model@version)")
+    if "MISSING_MODEL_CARD" in issue_codes:
+        recommendations.append("Add a model card section to AI components")
+    if "MISSING_MODEL_PARAMETERS" in issue_codes:
+        recommendations.append("Include model parameters in the model card section")
+    if "MISSING_CONSIDERATIONS" in issue_codes:
+        recommendations.append("Add ethical considerations, limitations, and risks to the model card")
+    if "MISSING_METADATA" in issue_codes:
+        recommendations.append("Add metadata section to the AIBOM")
+    if "MISSING_TOOLS" in issue_codes:
+        recommendations.append("Include tools information in the metadata section")
+    if "MISSING_AUTHORS" in issue_codes:
+        recommendations.append("Add authors information to the metadata section")
+    if "MISSING_PROPERTIES" in issue_codes:
+        recommendations.append("Include additional properties in the metadata section")
+    if "INVALID_AUTHOR_PROPERTY" in issue_codes:
+        recommendations.append("Remove 'url' property from author objects and use 'email' instead to comply with CycloneDX schema")
+    return recommendations
+def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True) -> Dict[str, Any]:
+    """
+    Calculate completeness score for an AIBOM and optionally validate against CycloneDX schema.
+    Args:
+        aibom: The AIBOM to score and validate
+        validate: Whether to perform validation against CycloneDX schema
+    Returns:
+        Dictionary containing score and validation results
+    """
     field_checklist = {}
     max_scores = {
         "required_fields": 20,
         (ext_score * 0.10)
     )
+    result = {
         "total_score": round(total_score, 2),
         "section_scores": {
             "required_fields": required_score,
         "max_scores": max_scores,
         "field_checklist": field_checklist
     }
+    # Add validation if requested
+    if validate:
+        validation_result = validate_aibom(aibom)
+        result["validation"] = validation_result
+        # Adjust score based on validation results
+        if not validation_result["valid"]:
+            # Count errors and warnings
+            error_count = validation_result["summary"]["error_count"]
+            warning_count = validation_result["summary"]["warning_count"]
+            # Apply penalties to the score
+            if error_count > 0:
+                # Severe penalty for errors (up to 50% reduction)
+                error_penalty = min(0.5, error_count * 0.1)
+                result["total_score"] = round(result["total_score"] * (1 - error_penalty), 2)
+                result["validation_penalty"] = f"-{int(error_penalty * 100)}% due to {error_count} schema errors"
+            elif warning_count > 0:
+                # Minor penalty for warnings (up to 20% reduction)
+                warning_penalty = min(0.2, warning_count * 0.05)
+                result["total_score"] = round(result["total_score"] * (1 - warning_penalty), 2)
+                result["validation_penalty"] = f"-{int(warning_penalty * 100)}% due to {warning_count} schema warnings"
+    return result
+def validate_aibom(aibom: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Validate an AIBOM against the CycloneDX schema and AI-specific requirements.
+    Args:
+        aibom: The AIBOM to validate
+    Returns:
+        Validation report with issues and recommendations
+    """
+    # Initialize validation report
+    report = {
+        "valid": True,
+        "schema_valid": True,
+        "ai_valid": True,
+        "issues": [],
+        "recommendations": [],
+        "summary": {
+            "error_count": 0,
+            "warning_count": 0,
+            "info_count": 0
+        }
+    }
+    # Load schema
+    schema = _load_cyclonedx_schema()
+    # Validate against CycloneDX schema
+    schema_issues = _validate_schema(aibom, schema)
+    if schema_issues:
+        report["schema_valid"] = False
+        report["valid"] = False
+        report["issues"].extend(schema_issues)
+    # Validate AI-specific requirements
+    ai_issues = _validate_ai_requirements(aibom)
+    if ai_issues:
+        report["ai_valid"] = False
+        report["valid"] = False
+        report["issues"].extend(ai_issues)
+    # Generate recommendations
+    report["recommendations"] = _generate_validation_recommendations(report["issues"])
+    # Update summary counts
+    for issue in report["issues"]:
+        if issue["severity"] == ValidationSeverity.ERROR.value:
+            report["summary"]["error_count"] += 1
+        elif issue["severity"] == ValidationSeverity.WARNING.value:
+            report["summary"]["warning_count"] += 1
+        elif issue["severity"] == ValidationSeverity.INFO.value:
+            report["summary"]["info_count"] += 1
+    return report
+def get_validation_summary(report: Dict[str, Any]) -> str:
+    """
+    Get a human-readable summary of the validation report.
+    Args:
+        report: Validation report
+    Returns:
+        Human-readable summary
+    """
+    if report["valid"]:
+        summary = "✅ AIBOM is valid and complies with CycloneDX schema and AI requirements.\n"
+    else:
+        summary = "❌ AIBOM validation failed.\n"
+    summary += f"\nSummary:\n"
+    summary += f"- Errors: {report['summary']['error_count']}\n"
+    summary += f"- Warnings: {report['summary']['warning_count']}\n"
+    summary += f"- Info: {report['summary']['info_count']}\n"
+    if not report["valid"]:
+        summary += "\nIssues:\n"
+        for issue in report["issues"]:
+            severity = issue["severity"].upper()
+            code = issue["code"]
+            message = issue["message"]
+            path = issue["path"]
+            summary += f"- [{severity}] {code}: {message} (at {path})\n"
+        summary += "\nRecommendations:\n"
+        for i, recommendation in enumerate(report["recommendations"], 1):
+            summary += f"{i}. {recommendation}\n"
+    return summary
 def merge_metadata(primary: Dict[str, Any], secondary: Dict[str, Any]) -> Dict[str, Any]: