""" Utility functions for the AIBOM Generator. """ import json import logging import os import re import uuid from typing import Dict, List, Optional, Any, Union logger = logging.getLogger(__name__) def setup_logging(level=logging.INFO): """Set up logging configuration.""" logging.basicConfig( level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) def ensure_directory(directory_path): """Ensure that a directory exists, creating it if necessary.""" if not os.path.exists(directory_path): os.makedirs(directory_path) return directory_path def generate_uuid(): """Generate a UUID for the AIBOM serialNumber.""" return str(uuid.uuid4()) def normalize_license_id(license_text): """ Normalize a license string to a SPDX license identifier if possible. Args: license_text: The license text to normalize Returns: SPDX license identifier or the original text if no match """ # Common license mappings license_mappings = { "mit": "MIT", "apache": "Apache-2.0", "apache 2": "Apache-2.0", "apache 2.0": "Apache-2.0", "apache-2": "Apache-2.0", "apache-2.0": "Apache-2.0", "gpl": "GPL-3.0-only", "gpl-3": "GPL-3.0-only", "gpl-3.0": "GPL-3.0-only", "gpl3": "GPL-3.0-only", "gpl v3": "GPL-3.0-only", "gpl-2": "GPL-2.0-only", "gpl-2.0": "GPL-2.0-only", "gpl2": "GPL-2.0-only", "gpl v2": "GPL-2.0-only", "lgpl": "LGPL-3.0-only", "lgpl-3": "LGPL-3.0-only", "lgpl-3.0": "LGPL-3.0-only", "bsd": "BSD-3-Clause", "bsd-3": "BSD-3-Clause", "bsd-3-clause": "BSD-3-Clause", "bsd-2": "BSD-2-Clause", "bsd-2-clause": "BSD-2-Clause", "cc": "CC-BY-4.0", "cc-by": "CC-BY-4.0", "cc-by-4.0": "CC-BY-4.0", "cc-by-sa": "CC-BY-SA-4.0", "cc-by-sa-4.0": "CC-BY-SA-4.0", "cc-by-nc": "CC-BY-NC-4.0", "cc-by-nc-4.0": "CC-BY-NC-4.0", "cc0": "CC0-1.0", "cc0-1.0": "CC0-1.0", "public domain": "CC0-1.0", "unlicense": "Unlicense", "proprietary": "NONE", "commercial": "NONE", } if not license_text: return None # Normalize to lowercase and remove punctuation normalized = re.sub(r'[^\w\s-]', '', license_text.lower()) # Check for direct matches if normalized in license_mappings: return license_mappings[normalized] # Check for partial matches for key, value in license_mappings.items(): if key in normalized: return value # Return original if no match return license_text def calculate_completeness_score(aibom: Dict[str, Any]) -> Dict[str, Any]: """ Calculate a completeness score for the AIBOM. Args: aibom: The AIBOM dictionary Returns: Dictionary containing: - total_score: overall completeness score (0-100) - section_scores: points earned per section - field_checklist: dictionary showing presence (✔) or absence (✘) of key fields """ score = 0 max_score = 100 section_scores = {} field_checklist = {} # Define scoring weights for different sections weights = { "required_fields": 20, "metadata": 20, "component_basic": 20, "component_model_card": 30, "external_references": 10, } # Required Fields required_fields = ["bomFormat", "specVersion", "serialNumber", "version"] required_present = [field for field in required_fields if field in aibom] required_score = (len(required_present) / len(required_fields)) * weights["required_fields"] section_scores["required_fields"] = round(required_score) for field in required_fields: field_checklist[field] = "✔" if field in required_present else "✘" # Metadata Fields metadata_score = 0 if "metadata" in aibom: metadata_fields = ["timestamp", "tools", "authors", "component"] present = [field for field in metadata_fields if field in aibom["metadata"]] metadata_score = (len(present) / len(metadata_fields)) * weights["metadata"] for field in metadata_fields: field_checklist[f"metadata.{field}"] = "✔" if field in present else "✘" section_scores["metadata"] = round(metadata_score) # Component Basic Info component_score = 0 component = aibom.get("components", [{}])[0] component_fields = ["type", "name", "bom-ref", "purl", "description", "licenses"] present = [field for field in component_fields if field in component] component_score = (len(present) / len(component_fields)) * weights["component_basic"] section_scores["component_basic"] = round(component_score) for field in component_fields: field_checklist[f"component.{field}"] = "✔" if field in present else "✘" # Model Card Section model_card_score = 0 model_card_fields = ["modelParameters", "quantitativeAnalysis", "considerations"] if "modelCard" in component: model_card = component["modelCard"] present = [field for field in model_card_fields if field in model_card] model_card_score = (len(present) / len(model_card_fields)) * weights["component_model_card"] for field in model_card_fields: field_checklist[f"modelCard.{field}"] = "✔" if field in present else "✘" else: for field in model_card_fields: field_checklist[f"modelCard.{field}"] = "✘" section_scores["component_model_card"] = round(model_card_score) # External References ext_score = weights["external_references"] if aibom.get("externalReferences") else 0 section_scores["external_references"] = round(ext_score) field_checklist["externalReferences"] = "✔" if ext_score else "✘" # Final total score total_score = round(sum(section_scores.values())) return { "total_score": total_score, "section_scores": section_scores, "field_checklist": field_checklist } def merge_metadata(primary: Dict[str, Any], secondary: Dict[str, Any]) -> Dict[str, Any]: """ Merge two metadata dictionaries, giving priority to the primary dictionary. Args: primary: Primary metadata dictionary secondary: Secondary metadata dictionary Returns: Merged metadata dictionary """ result = secondary.copy() for key, value in primary.items(): if value is not None: if key in result and isinstance(value, dict) and isinstance(result[key], dict): result[key] = merge_metadata(value, result[key]) else: result[key] = value return result def extract_model_id_parts(model_id: str) -> Dict[str, str]: """ Extract parts from a Hugging Face model ID. Args: model_id: Hugging Face model ID (e.g., "google/bert-base-uncased") Returns: Dictionary with parts (owner, name) """ parts = model_id.split("/") if len(parts) == 1: return { "owner": None, "name": parts[0], } else: return { "owner": parts[0], "name": "/".join(parts[1:]), } def create_purl(model_id: str) -> str: """ Create a Package URL (purl) for a Hugging Face model. Args: model_id: Hugging Face model ID Returns: Package URL string """ parts = extract_model_id_parts(model_id) if parts["owner"]: return f"pkg:huggingface/{parts['owner']}/{parts['name']}" else: return f"pkg:huggingface/{parts['name']}"