File size: 6,372 Bytes
df2b222 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
"""
Package management utilities for dynamic package installation in Modal sandboxes.
This module provides functions to analyze code for imports and manage package installation.
"""
import ast
import re
from typing import Set, List
try:
from mcp_hub.logging_config import logger
except ImportError:
# Fallback logger for testing/standalone use
import logging
logger = logging.getLogger(__name__)
# Core packages that should be preinstalled in the base image
CORE_PREINSTALLED_PACKAGES = {
"numpy", "pandas", "matplotlib", "requests", "json", "os", "sys",
"time", "datetime", "math", "random", "collections", "itertools",
"functools", "re", "urllib", "csv", "sqlite3", "pathlib", "typing",
"asyncio", "threading", "multiprocessing", "subprocess", "shutil",
"tempfile", "io", "gzip", "zipfile", "tarfile", "base64", "hashlib",
"secrets", "uuid", "pickle", "copy", "operator", "bisect", "heapq",
"contextlib", "weakref", "gc", "inspect", "types", "enum", "dataclasses",
"decimal", "fractions", "statistics", "string", "textwrap", "locale",
"calendar", "timeit", "argparse", "getopt", "logging", "warnings",
"platform", "signal", "errno", "ctypes", "struct", "array", "queue",
"socketserver", "http", "urllib2", "html", "xml", "email", "mailbox"
}
# Extended packages that can be dynamically installed
COMMON_PACKAGES = {
"scikit-learn": "sklearn",
"beautifulsoup4": "bs4",
"pillow": "PIL",
"opencv-python-headless": "cv2",
"python-dateutil": "dateutil",
"plotly": "plotly",
"seaborn": "seaborn",
"polars": "polars",
"lightgbm": "lightgbm",
"xgboost": "xgboost",
"flask": "flask",
"fastapi": "fastapi",
"httpx": "httpx",
"networkx": "networkx",
"wordcloud": "wordcloud",
"textblob": "textblob",
"spacy": "spacy",
"nltk": "nltk"
}
# Map import names to package names
IMPORT_TO_PACKAGE = {v: k for k, v in COMMON_PACKAGES.items()}
IMPORT_TO_PACKAGE.update({k: k for k in COMMON_PACKAGES.keys()})
def extract_imports_from_code(code_str: str) -> Set[str]:
"""
Extract all import statements from Python code using AST parsing.
Args:
code_str: The Python code to analyze
Returns:
Set of imported module names (top-level only)
"""
imports = set()
try:
tree = ast.parse(code_str)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
# Get top-level module name
module_name = alias.name.split('.')[0]
imports.add(module_name)
elif isinstance(node, ast.ImportFrom):
if node.module:
# Get top-level module name
module_name = node.module.split('.')[0]
imports.add(module_name)
except Exception as e:
logger.warning(f"Failed to parse code with AST, falling back to regex: {e}")
# Fallback to regex-based extraction
imports.update(extract_imports_with_regex(code_str))
return imports
def extract_imports_with_regex(code_str: str) -> Set[str]:
"""
Fallback method to extract imports using regex patterns.
Args:
code_str: The Python code to analyze
Returns:
Set of imported module names
"""
imports = set()
# Pattern for "import module" statements
import_pattern = r'^import\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)'
# Pattern for "from module import ..." statements
from_pattern = r'^from\s+([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\s+import'
for line in code_str.split('\n'):
line = line.strip()
if not line or line.startswith('#'):
continue
# Check for import statements
import_match = re.match(import_pattern, line)
if import_match:
module_name = import_match.group(1).split('.')[0]
imports.add(module_name)
continue
# Check for from...import statements
from_match = re.match(from_pattern, line)
if from_match:
module_name = from_match.group(1).split('.')[0]
imports.add(module_name)
return imports
def get_packages_to_install(detected_imports: Set[str]) -> List[str]:
"""
Determine which packages need to be installed based on detected imports.
Args:
detected_imports: Set of module names found in the code
Returns:
List of package names that need to be pip installed
"""
packages_to_install = []
for import_name in detected_imports:
# Skip if it's a core preinstalled package
if import_name in CORE_PREINSTALLED_PACKAGES:
continue
# Check if we have a known package mapping
if import_name in IMPORT_TO_PACKAGE:
package_name = IMPORT_TO_PACKAGE[import_name]
packages_to_install.append(package_name)
# For unknown packages, assume package name matches import name
elif import_name not in CORE_PREINSTALLED_PACKAGES:
packages_to_install.append(import_name)
return packages_to_install
def get_warmup_import_commands() -> List[str]:
"""
Get list of import commands to run during sandbox warmup.
Returns:
List of Python import statements for core packages
"""
core_imports = [
"import numpy",
"import pandas",
"import matplotlib.pyplot",
"import requests",
"print('Core packages warmed up successfully')"
]
return core_imports
def create_package_install_command(packages: List[str]) -> str:
"""
Create a pip install command for the given packages.
Args:
packages: List of package names to install
Returns:
Pip install command string
"""
if not packages:
return ""
# Remove duplicates and sort
unique_packages = sorted(set(packages))
return f"pip install {' '.join(unique_packages)}" |