Spaces:
Configuration error
Configuration error
#!/usr/bin/env python3 | |
import sys | |
import pkg_resources | |
import requests | |
from pathlib import Path | |
import json | |
from typing import Dict, List, Optional, Set, Tuple | |
import configparser | |
import re | |
from dataclasses import dataclass | |
class PackageLicense: | |
name: str | |
version: Optional[str] | |
license_type: Optional[str] | |
is_authorized: bool | |
reason: str | |
class LicenseChecker: | |
def __init__( | |
self, config_file: Path = Path("./tests/code_coverage_tests/liccheck.ini") | |
): | |
if not config_file.exists(): | |
print(f"Error: Config file {config_file} not found") | |
sys.exit(1) | |
self.config = configparser.ConfigParser(allow_no_value=True) | |
self.config.read(config_file) | |
# Initialize license sets | |
self.authorized_licenses = self._parse_license_list( | |
"Licenses", "authorized_licenses" | |
) | |
self.unauthorized_licenses = self._parse_license_list( | |
"Licenses", "unauthorized_licenses" | |
) | |
# Parse authorized packages | |
self.authorized_packages = self._parse_authorized_packages() | |
# Initialize cache | |
self.cache_file = Path("license_cache.json") | |
self.license_cache: Dict[str, str] = {} | |
if self.cache_file.exists(): | |
with open(self.cache_file) as f: | |
self.license_cache = json.load(f) | |
# Track package results | |
self.package_results: List[PackageLicense] = [] | |
def _parse_license_list(self, section: str, option: str) -> Set[str]: | |
"""Parse license list from config, handling comments and whitespace.""" | |
if not self.config.has_option(section, option): | |
return set() | |
licenses = set() | |
for line in self.config.get(section, option).split("\n"): | |
line = line.strip().lower() | |
if line and not line.startswith("#"): | |
licenses.add(line) | |
return licenses | |
def _parse_authorized_packages(self) -> Dict[str, Dict[str, str]]: | |
"""Parse authorized packages with their version specs and comments.""" | |
authorized = {} | |
if self.config.has_section("Authorized Packages"): | |
for package, spec in self.config.items("Authorized Packages"): | |
if not package.startswith("#"): | |
package = package.strip().lower() | |
parts = spec.split("#", 1) | |
version_spec = parts[0].strip() | |
comment = parts[1].strip() if len(parts) > 1 else "" | |
authorized[package] = { | |
"version_spec": version_spec, | |
"comment": comment, | |
} | |
return authorized | |
def get_package_license_from_pypi( | |
self, package_name: str, version: str | |
) -> Optional[str]: | |
"""Fetch license information for a package from PyPI.""" | |
try: | |
url = f"https://pypi.org/pypi/{package_name}/{version}/json" | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
data = response.json() | |
return data.get("info", {}).get("license") | |
except Exception as e: | |
print( | |
f"Warning: Failed to fetch license for {package_name} {version}: {str(e)}" | |
) | |
return None | |
def is_license_acceptable(self, license_str: str) -> Tuple[bool, str]: | |
"""Check if a license is acceptable based on configured lists.""" | |
if not license_str: | |
return False, "Unknown license" | |
# Normalize license string to handle common variations | |
normalized_license = license_str.lower() | |
normalized_license = normalized_license.replace("-", " ").replace("_", " ") | |
# Special case for BSD licenses | |
if "bsd" in normalized_license: | |
if any( | |
variation in normalized_license | |
for variation in ["3 clause", "3-clause", "new", "simplified"] | |
): | |
return True, "Matches authorized license: BSD 3-Clause" | |
# Check unauthorized licenses first | |
for unauth in self.unauthorized_licenses: | |
if unauth in normalized_license: | |
return False, f"Matches unauthorized license: {unauth}" | |
# Then check authorized licenses | |
for auth in self.authorized_licenses: | |
if auth in normalized_license: | |
return True, f"Matches authorized license: {auth}" | |
return False, "License not in authorized list" | |
def check_package(self, package_name: str, version: Optional[str] = None) -> bool: | |
"""Check if a specific package version is compliant.""" | |
package_lower = package_name.lower() | |
# Check if package is in authorized packages list | |
if package_lower in self.authorized_packages: | |
pkg_info = self.authorized_packages[package_lower] | |
# If there's a comment, consider it manually verified | |
if pkg_info.get("comment"): | |
result = PackageLicense( | |
name=package_name, | |
version=version, | |
license_type=pkg_info["comment"], | |
is_authorized=True, | |
reason="Manually verified in config", | |
) | |
self.package_results.append(result) | |
print(f"✅ {package_name}: Manually verified - {pkg_info['comment']}") | |
return True | |
# If no comment, proceed with license check but package is considered authorized | |
license_type = self.get_package_license_from_pypi( | |
package_name, version or "" | |
) | |
if license_type: | |
is_acceptable, reason = self.is_license_acceptable(license_type) | |
result = PackageLicense( | |
name=package_name, | |
version=version, | |
license_type=license_type, | |
is_authorized=True, # Package is authorized even if license check fails | |
reason=f"Listed in authorized packages - {license_type}", | |
) | |
self.package_results.append(result) | |
print( | |
f"✅ {package_name}: {license_type} (Listed in authorized packages)" | |
) | |
return True | |
# If package is not authorized or authorization check failed, proceed with normal license check | |
cache_key = f"{package_name}:{version}" if version else package_name | |
if cache_key in self.license_cache: | |
license_type = self.license_cache[cache_key] | |
else: | |
license_type = self.get_package_license_from_pypi( | |
package_name, version or "" | |
) | |
if license_type: | |
self.license_cache[cache_key] = license_type | |
if not license_type: | |
result = PackageLicense( | |
name=package_name, | |
version=version, | |
license_type=None, | |
is_authorized=False, | |
reason="Could not determine license", | |
) | |
self.package_results.append(result) | |
print(f"⚠️ Warning: Could not determine license for {package_name}") | |
return False | |
is_acceptable, reason = self.is_license_acceptable(license_type) | |
result = PackageLicense( | |
name=package_name, | |
version=version, | |
license_type=license_type, | |
is_authorized=is_acceptable, | |
reason=reason, | |
) | |
self.package_results.append(result) | |
if is_acceptable: | |
print(f"✅ {package_name}: {license_type}") | |
else: | |
print(f"❌ {package_name}: {license_type} - {reason}") | |
return is_acceptable | |
def check_requirements(self, requirements_file: Path) -> bool: | |
"""Check all packages in a requirements file.""" | |
print(f"\nChecking licenses for packages in {requirements_file}...") | |
try: | |
with open(requirements_file) as f: | |
requirements = [ | |
pkg_resources.Requirement.parse(line) | |
for line in f | |
if line.strip() and not line.startswith("#") | |
] | |
except Exception as e: | |
print(f"Error parsing {requirements_file}: {str(e)}") | |
return False | |
all_compliant = True | |
for req in requirements: | |
try: | |
version = next(iter(req.specs))[1] if req.specs else None | |
except Exception: | |
version = None | |
if not self.check_package(req.name, version): | |
all_compliant = False | |
# Save updated cache | |
with open(self.cache_file, "w") as f: | |
json.dump(self.license_cache, f, indent=2) | |
return all_compliant | |
def main(): | |
# req_file = "../../requirements.txt" ## LOCAL TESTING | |
req_file = "./requirements.txt" | |
checker = LicenseChecker() | |
# Check requirements | |
if not checker.check_requirements(Path(req_file)): | |
# Get lists of problematic packages | |
unverified = [p for p in checker.package_results if not p.license_type] | |
invalid = [ | |
p for p in checker.package_results if p.license_type and not p.is_authorized | |
] | |
# Print detailed information about problematic packages | |
if unverified: | |
print("\n❌ Packages with unknown licenses:") | |
for pkg in unverified: | |
version_str = f" ({pkg.version})" if pkg.version else "" | |
print(f"- {pkg.name}{version_str}") | |
if invalid: | |
print("\n❌ Packages with unauthorized licenses:") | |
for pkg in invalid: | |
version_str = f" ({pkg.version})" if pkg.version else "" | |
print(f"- {pkg.name}{version_str}: {pkg.license_type}") | |
# Only error if there are packages that aren't explicitly authorized | |
unhandled_packages = [ | |
p | |
for p in (unverified + invalid) | |
if p.name.lower() not in checker.authorized_packages | |
] | |
if unhandled_packages: | |
print("\n❌ Error: Found packages that need verification:") | |
for pkg in unhandled_packages: | |
version_str = f" ({pkg.version})" if pkg.version else "" | |
license_str = ( | |
f" - {pkg.license_type}" | |
if pkg.license_type | |
else " - Unknown license" | |
) | |
print(f"- {pkg.name}{version_str}{license_str}") | |
print( | |
"\nAdd these packages to the [Authorized Packages] section in liccheck.ini with a comment about their license verification." | |
) | |
print("Example:") | |
print("package-name: >=1.0.0 # MIT license manually verified") | |
sys.exit(1) | |
else: | |
print("\n✅ All dependencies have acceptable licenses.") | |
sys.exit(0) | |
if __name__ == "__main__": | |
main() | |