Spaces:

aetheris-ai
/

aibom-generator

Running

App Files Files

a1c00l commited on 13 days ago

Commit

240512e

verified ·

1 Parent(s): d05fd64

Update src/aibom_generator/utils.py

Browse files

Files changed (1) hide show

src/aibom_generator/utils.py +443 -1

src/aibom_generator/utils.py CHANGED Viewed

@@ -18,6 +18,109 @@ class ValidationSeverity(Enum):
     WARNING = "warning"
     INFO = "info"
 def setup_logging(level=logging.INFO):
     logging.basicConfig(
@@ -103,6 +206,233 @@ def validate_spdx(license_entry):
     return license_entry in spdx_licenses
 def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
     """
     Validate AI-specific requirements for an AIBOM.
@@ -389,17 +719,129 @@ def get_validation_summary(report: Dict[str, Any]) -> str:
     return summary
-def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True) -> Dict[str, Any]:
     """
     Calculate completeness score for an AIBOM and optionally validate against AI requirements.
     Args:
         aibom: The AIBOM to score and validate
         validate: Whether to perform validation
     Returns:
         Dictionary containing score and validation results
     """
     field_checklist = {}
     max_scores = {
         "required_fields": 20,

     WARNING = "warning"
     INFO = "info"
+# Field classification based on documentation value (silently aligned with SPDX)
+FIELD_CLASSIFICATION = {
+    # Critical fields (silently aligned with SPDX mandatory fields)
+    "bomFormat": {"tier": "critical", "weight": 3, "category": "required_fields"},
+    "specVersion": {"tier": "critical", "weight": 3, "category": "required_fields"},
+    "serialNumber": {"tier": "critical", "weight": 3, "category": "required_fields"},
+    "version": {"tier": "critical", "weight": 3, "category": "required_fields"},
+    "buildTime": {"tier": "critical", "weight": 4, "category": "required_fields"},
+    "releaseTime": {"tier": "critical", "weight": 4, "category": "required_fields"},
+    "name": {"tier": "critical", "weight": 4, "category": "component_basic"},
+    "downloadLocation": {"tier": "critical", "weight": 4, "category": "external_references"},
+    "primaryPurpose": {"tier": "critical", "weight": 3, "category": "metadata"},
+    "suppliedBy": {"tier": "critical", "weight": 4, "category": "metadata"},
+    # Important fields (aligned with key SPDX optional fields)
+    "type": {"tier": "important", "weight": 2, "category": "component_basic"},
+    "purl": {"tier": "important", "weight": 4, "category": "component_basic"},
+    "description": {"tier": "important", "weight": 4, "category": "component_basic"},
+    "licenses": {"tier": "important", "weight": 4, "category": "component_basic"},
+    "energyConsumption": {"tier": "important", "weight": 3, "category": "component_model_card"},
+    "hyperparameter": {"tier": "important", "weight": 3, "category": "component_model_card"},
+    "limitation": {"tier": "important", "weight": 3, "category": "component_model_card"},
+    "safetyRiskAssessment": {"tier": "important", "weight": 3, "category": "component_model_card"},
+    "typeOfModel": {"tier": "important", "weight": 3, "category": "component_model_card"},
+    # Supplementary fields (aligned with remaining SPDX optional fields)
+    "modelExplainability": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "standardCompliance": {"tier": "supplementary", "weight": 2, "category": "metadata"},
+    "domain": {"tier": "supplementary", "weight": 2, "category": "metadata"},
+    "energyQuantity": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "energyUnit": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "informationAboutTraining": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "informationAboutApplication": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "metric": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "metricDecisionThreshold": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "modelDataPreprocessing": {"tier": "supplementary", "weight": 2, "category": "component_model_card"},
+    "autonomyType": {"tier": "supplementary", "weight": 1, "category": "metadata"},
+    "useSensitivePersonalInformation": {"tier": "supplementary", "weight": 2, "category": "component_model_card"}
+}
+# Completeness profiles (silently aligned with SPDX requirements)
+COMPLETENESS_PROFILES = {
+    "basic": {
+        "description": "Minimal fields required for identification",
+        "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name"],
+        "minimum_score": 40
+    },
+    "standard": {
+        "description": "Comprehensive fields for proper documentation",
+        "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name",
+                           "buildTime", "releaseTime", "downloadLocation", "primaryPurpose", "suppliedBy"],
+        "minimum_score": 70
+    },
+    "advanced": {
+        "description": "Extensive documentation for maximum transparency",
+        "required_fields": ["bomFormat", "specVersion", "serialNumber", "version", "name",
+                           "buildTime", "releaseTime", "downloadLocation", "primaryPurpose", "suppliedBy",
+                           "type", "purl", "description", "licenses", "hyperparameter", "limitation",
+                           "energyConsumption", "safetyRiskAssessment", "typeOfModel"],
+        "minimum_score": 85
+    }
+}
+# Validation messages framed as best practices
+VALIDATION_MESSAGES = {
+    "buildTime": {
+        "missing": "Missing critical field: buildTime - required for comprehensive version tracking",
+        "recommendation": "Add build timestamp to enable proper versioning and reproducibility"
+    },
+    "releaseTime": {
+        "missing": "Missing critical field: releaseTime - important for tracking release history",
+        "recommendation": "Add release timestamp to document when this version was released"
+    },
+    "name": {
+        "missing": "Missing critical field: name - essential for model identification",
+        "recommendation": "Add a descriptive name for the model"
+    },
+    "downloadLocation": {
+        "missing": "Missing critical field: downloadLocation - needed for artifact retrieval",
+        "recommendation": "Add information about where the model can be downloaded"
+    },
+    "primaryPurpose": {
+        "missing": "Missing critical field: primaryPurpose - important for understanding model intent",
+        "recommendation": "Add information about the primary purpose of this model"
+    },
+    "suppliedBy": {
+        "missing": "Missing critical field: suppliedBy - needed for provenance tracking",
+        "recommendation": "Add information about who supplied this model"
+    },
+    "energyConsumption": {
+        "missing": "Missing important field: energyConsumption - helpful for environmental impact assessment",
+        "recommendation": "Consider documenting energy consumption metrics for better transparency"
+    },
+    "hyperparameter": {
+        "missing": "Missing important field: hyperparameter - valuable for reproducibility",
+        "recommendation": "Document key hyperparameters used in training"
+    },
+    "limitation": {
+        "missing": "Missing important field: limitation - important for responsible use",
+        "recommendation": "Document known limitations of the model to guide appropriate usage"
+    }
+}
 def setup_logging(level=logging.INFO):
     logging.basicConfig(
     return license_entry in spdx_licenses
+def check_field_in_aibom(aibom: Dict[str, Any], field: str) -> bool:
+    """
+    Check if a field is present in the AIBOM.
+    Args:
+        aibom: The AIBOM to check
+        field: The field name to check
+    Returns:
+        True if the field is present, False otherwise
+    """
+    # Check in root level
+    if field in aibom:
+        return True
+    # Check in metadata
+    if "metadata" in aibom:
+        metadata = aibom["metadata"]
+        if field in metadata:
+            return True
+        # Check in metadata properties
+        if "properties" in metadata:
+            for prop in metadata["properties"]:
+                if prop.get("name") == f"spdx:{field}" or prop.get("name") == field:
+                    return True
+    # Check in components
+    if "components" in aibom and aibom["components"]:
+        component = aibom["components"][0]  # Use first component
+        if field in component:
+            return True
+        # Check in component properties
+        if "properties" in component:
+            for prop in component["properties"]:
+                if prop.get("name") == f"spdx:{field}" or prop.get("name") == field:
+                    return True
+        # Check in model card
+        if "modelCard" in component:
+            model_card = component["modelCard"]
+            if field in model_card:
+                return True
+            # Check in model parameters
+            if "modelParameters" in model_card:
+                if field in model_card["modelParameters"]:
+                    return True
+                # Check in model parameters properties
+                if "properties" in model_card["modelParameters"]:
+                    for prop in model_card["modelParameters"]["properties"]:
+                        if prop.get("name") == f"spdx:{field}" or prop.get("name") == field:
+                            return True
+            # Check in considerations
+            if "considerations" in model_card:
+                if field in model_card["considerations"]:
+                    return True
+                # Check in specific consideration sections
+                for section in ["technicalLimitations", "ethicalConsiderations", "environmentalConsiderations"]:
+                    if section in model_card["considerations"]:
+                        if field == "limitation" and section == "technicalLimitations":
+                            return True
+                        if field == "safetyRiskAssessment" and section == "ethicalConsiderations":
+                            return True
+                        if field == "energyConsumption" and section == "environmentalConsiderations":
+                            return True
+    # Check in external references
+    if field == "downloadLocation" and "externalReferences" in aibom:
+        for ref in aibom["externalReferences"]:
+            if ref.get("type") == "distribution":
+                return True
+    return False
+def determine_completeness_profile(aibom: Dict[str, Any], score: float) -> Dict[str, Any]:
+    """
+    Determine which completeness profile the AIBOM satisfies.
+    Args:
+        aibom: The AIBOM to check
+        score: The calculated score
+    Returns:
+        Dictionary with profile information
+    """
+    satisfied_profiles = []
+    for profile_name, profile in COMPLETENESS_PROFILES.items():
+        # Check if all required fields are present
+        all_required_present = all(check_field_in_aibom(aibom, field) for field in profile["required_fields"])
+        # Check if score meets minimum
+        score_sufficient = score >= profile["minimum_score"]
+        if all_required_present and score_sufficient:
+            satisfied_profiles.append(profile_name)
+    # Return the highest satisfied profile
+    if "advanced" in satisfied_profiles:
+        return {
+            "name": "advanced",
+            "description": COMPLETENESS_PROFILES["advanced"]["description"],
+            "satisfied": True
+        }
+    elif "standard" in satisfied_profiles:
+        return {
+            "name": "standard",
+            "description": COMPLETENESS_PROFILES["standard"]["description"],
+            "satisfied": True
+        }
+    elif "basic" in satisfied_profiles:
+        return {
+            "name": "basic",
+            "description": COMPLETENESS_PROFILES["basic"]["description"],
+            "satisfied": True
+        }
+    else:
+        return {
+            "name": "incomplete",
+            "description": "Does not satisfy any completeness profile",
+            "satisfied": False
+        }
+def apply_completeness_penalties(original_score: float, missing_fields: Dict[str, List[str]]) -> Dict[str, Any]:
+    """
+    Apply penalties based on missing critical fields.
+    Args:
+        original_score: The original calculated score
+        missing_fields: Dictionary of missing fields by tier
+    Returns:
+        Dictionary with penalty information
+    """
+    # Count missing fields by tier
+    missing_critical_count = len(missing_fields["critical"])
+    missing_important_count = len(missing_fields["important"])
+    # Calculate penalty based on missing critical fields
+    if missing_critical_count > 3:
+        penalty_factor = 0.8  # 20% penalty
+        penalty_reason = "Multiple critical fields missing"
+    elif missing_critical_count > 0:
+        penalty_factor = 0.9  # 10% penalty
+        penalty_reason = "Some critical fields missing"
+    elif missing_important_count > 5:
+        penalty_factor = 0.95  # 5% penalty
+        penalty_reason = "Several important fields missing"
+    else:
+        # No penalty
+        penalty_factor = 1.0
+        penalty_reason = None
+    adjusted_score = original_score * penalty_factor
+    return {
+        "adjusted_score": round(adjusted_score, 2),
+        "penalty_applied": penalty_reason is not None,
+        "penalty_reason": penalty_reason,
+        "penalty_factor": penalty_factor
+    }
+def generate_field_recommendations(missing_fields: Dict[str, List[str]]) -> List[Dict[str, Any]]:
+    """
+    Generate recommendations for missing fields.
+    Args:
+        missing_fields: Dictionary of missing fields by tier
+    Returns:
+        List of recommendations
+    """
+    recommendations = []
+    # Prioritize critical fields
+    for field in missing_fields["critical"]:
+        if field in VALIDATION_MESSAGES:
+            recommendations.append({
+                "priority": "high",
+                "field": field,
+                "message": VALIDATION_MESSAGES[field]["missing"],
+                "recommendation": VALIDATION_MESSAGES[field]["recommendation"]
+            })
+        else:
+            recommendations.append({
+                "priority": "high",
+                "field": field,
+                "message": f"Missing critical field: {field}",
+                "recommendation": f"Add {field} information to improve documentation completeness"
+            })
+    # Then suggest important fields (limit to top 3)
+    important_count = 0
+    for field in missing_fields["important"]:
+        if important_count >= 3:
+            break
+        if field in VALIDATION_MESSAGES:
+            recommendations.append({
+                "priority": "medium",
+                "field": field,
+                "message": VALIDATION_MESSAGES[field]["missing"],
+                "recommendation": VALIDATION_MESSAGES[field]["recommendation"]
+            })
+            important_count += 1
+        else:
+            recommendations.append({
+                "priority": "medium",
+                "field": field,
+                "message": f"Missing important field: {field}",
+                "recommendation": f"Consider adding {field} information to enhance documentation"
+            })
+            important_count += 1
+    return recommendations
 def _validate_ai_requirements(aibom: Dict[str, Any]) -> List[Dict[str, Any]]:
     """
     Validate AI-specific requirements for an AIBOM.
     return summary
+def calculate_industry_neutral_score(aibom: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Calculate completeness score using industry best practices without explicit standard references.
+    Args:
+        aibom: The AIBOM to score
+    Returns:
+        Dictionary containing score and recommendations
+    """
+    field_checklist = {}
+    max_scores = {
+        "required_fields": 20,
+        "metadata": 20,
+        "component_basic": 20,
+        "component_model_card": 30,
+        "external_references": 10
+    }
+    # Track missing fields by tier
+    missing_fields = {
+        "critical": [],
+        "important": [],
+        "supplementary": []
+    }
+    # Score each field based on classification
+    scores_by_category = {category: 0 for category in max_scores.keys()}
+    for field, classification in FIELD_CLASSIFICATION.items():
+        tier = classification["tier"]
+        weight = classification["weight"]
+        category = classification["category"]
+        # Check if field is present
+        is_present = check_field_in_aibom(aibom, field)
+        if is_present:
+            scores_by_category[category] += weight
+        else:
+            missing_fields[tier].append(field)
+        # Add to field checklist with appropriate indicators
+        importance_indicator = "★★★" if tier == "critical" else "★★" if tier == "important" else "★"
+        field_checklist[field] = f"{'✔' if is_present else '✘'} {importance_indicator}"
+    # Normalize category scores to max_scores
+    for category in scores_by_category:
+        scores_by_category[category] = min(scores_by_category[category], max_scores[category])
+    # Calculate total score (sum of weighted category scores)
+    total_score = 0
+    for category in scores_by_category:
+        category_weight = max_scores[category] / sum(max_scores.values())
+        total_score += scores_by_category[category] * category_weight
+    total_score = total_score * 100  # Convert to percentage
+    # Determine completeness profile
+    profile = determine_completeness_profile(aibom, total_score)
+    # Apply penalties for missing critical fields
+    penalty_result = apply_completeness_penalties(total_score, missing_fields)
+    # Generate recommendations
+    recommendations = generate_field_recommendations(missing_fields)
+    return {
+        "total_score": penalty_result["adjusted_score"],
+        "section_scores": scores_by_category,
+        "max_scores": max_scores,
+        "field_checklist": field_checklist,
+        "missing_fields": missing_fields,
+        "completeness_profile": profile,
+        "penalty_applied": penalty_result["penalty_applied"],
+        "penalty_reason": penalty_result["penalty_reason"],
+        "recommendations": recommendations
+    }
+def calculate_completeness_score(aibom: Dict[str, Any], validate: bool = True, use_best_practices: bool = True) -> Dict[str, Any]:
     """
     Calculate completeness score for an AIBOM and optionally validate against AI requirements.
+    Enhanced with industry best practices scoring.
     Args:
         aibom: The AIBOM to score and validate
         validate: Whether to perform validation
+        use_best_practices: Whether to use enhanced industry best practices scoring
     Returns:
         Dictionary containing score and validation results
     """
+    # If using best practices scoring, use the enhanced industry-neutral approach
+    if use_best_practices:
+        result = calculate_industry_neutral_score(aibom)
+        # Add validation if requested
+        if validate:
+            validation_result = validate_aibom(aibom)
+            result["validation"] = validation_result
+            # Adjust score based on validation results
+            if not validation_result["valid"]:
+                # Count errors and warnings
+                error_count = validation_result["summary"]["error_count"]
+                warning_count = validation_result["summary"]["warning_count"]
+                # Apply penalties to the score
+                if error_count > 0:
+                    # Severe penalty for errors (up to 50% reduction)
+                    error_penalty = min(0.5, error_count * 0.1)
+                    result["total_score"] = round(result["total_score"] * (1 - error_penalty), 2)
+                    result["validation_penalty"] = f"-{int(error_penalty * 100)}% due to {error_count} schema errors"
+                elif warning_count > 0:
+                    # Minor penalty for warnings (up to 20% reduction)
+                    warning_penalty = min(0.2, warning_count * 0.05)
+                    result["total_score"] = round(result["total_score"] * (1 - warning_penalty), 2)
+                    result["validation_penalty"] = f"-{int(warning_penalty * 100)}% due to {warning_count} schema warnings"
+        return result
+    # Otherwise, use the original scoring method
     field_checklist = {}
     max_scores = {
         "required_fields": 20,