File size: 7,768 Bytes
7e60a42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
#!/usr/bin/env python3
"""
Smart startup script for AI Dataset Studio
Automatically detects available features and chooses the best version to run
"""
import sys
import logging
import importlib
from typing import Dict, List, Tuple
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]:
"""Check if a module can be imported"""
try:
importlib.import_module(module_name)
return True, f"β
{module_name}"
except ImportError as e:
return False, f"β {module_name}: {str(e)}"
def diagnose_system() -> Dict[str, bool]:
"""Diagnose system capabilities"""
logger.info("π Diagnosing system capabilities...")
# Essential dependencies
essential_deps = [
('gradio', 'gradio'),
('requests', 'requests'),
('bs4', 'beautifulsoup4'),
('pandas', 'pandas'),
('numpy', 'numpy')
]
# Optional dependencies
optional_deps = [
('transformers', 'transformers'),
('torch', 'torch'),
('datasets', 'datasets'),
('nltk', 'nltk'),
('sentence_transformers', 'sentence-transformers')
]
results = {
'essential_available': True,
'ai_models_available': False,
'nlp_available': False,
'datasets_available': False,
'missing_essential': [],
'missing_optional': []
}
# Check essential dependencies
logger.info("π Checking essential dependencies...")
for module, package in essential_deps:
available, msg = check_import(module, package)
logger.info(f" {msg}")
if not available:
results['essential_available'] = False
results['missing_essential'].append(package)
# Check optional dependencies
logger.info("π Checking optional dependencies...")
for module, package in optional_deps:
available, msg = check_import(module, package)
logger.info(f" {msg}")
if not available:
results['missing_optional'].append(package)
else:
if module in ['transformers', 'torch']:
results['ai_models_available'] = True
elif module == 'nltk':
results['nlp_available'] = True
elif module == 'datasets':
results['datasets_available'] = True
return results
def test_gpu_availability() -> bool:
"""Test if GPU is available"""
try:
import torch
gpu_available = torch.cuda.is_available()
if gpu_available:
gpu_name = torch.cuda.get_device_name(0)
logger.info(f"π GPU available: {gpu_name}")
else:
logger.info("π» Using CPU (GPU not available)")
return gpu_available
except ImportError:
logger.info("π» Using CPU (PyTorch not available)")
return False
def install_missing_packages(packages: List[str]) -> bool:
"""Attempt to install missing packages"""
if not packages:
return True
logger.info(f"π¦ Attempting to install missing packages: {', '.join(packages)}")
try:
import subprocess
# Try to install packages
cmd = [sys.executable, "-m", "pip", "install"] + packages
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
logger.info("β
Packages installed successfully!")
return True
else:
logger.error(f"β Installation failed: {result.stderr}")
return False
except Exception as e:
logger.error(f"β Installation error: {e}")
return False
def run_full_version():
"""Run the full-featured version"""
logger.info("π Starting full AI Dataset Studio...")
try:
# Import and run the main app
import app
logger.info("β
Full version loaded successfully")
except Exception as e:
logger.error(f"β Full version failed: {e}")
raise
def run_minimal_version():
"""Run the minimal version"""
logger.info("π Starting minimal AI Dataset Studio...")
try:
# Import and run the minimal app
import app_minimal
logger.info("β
Minimal version loaded successfully")
except Exception as e:
logger.error(f"β Minimal version failed: {e}")
raise
def show_feature_summary(results: Dict[str, bool]):
"""Show a summary of available features"""
logger.info("π Feature Summary:")
if results['essential_available']:
logger.info(" β
Core web scraping and data processing")
logger.info(" β
CSV and JSON export")
logger.info(" β
Quality filtering and text cleaning")
if results['ai_models_available']:
logger.info(" β
AI-powered sentiment analysis")
logger.info(" β
Named entity recognition")
logger.info(" β
Advanced content quality assessment")
else:
logger.info(" β οΈ AI features disabled (install transformers + torch)")
if results['nlp_available']:
logger.info(" β
Advanced text processing with NLTK")
else:
logger.info(" β οΈ Basic text processing only (install nltk)")
if results['datasets_available']:
logger.info(" β
HuggingFace Datasets export")
else:
logger.info(" β οΈ Standard export only (install datasets)")
def main():
"""Main startup function"""
print("π AI Dataset Studio - Smart Startup")
print("=" * 50)
# Diagnose system
results = diagnose_system()
# Show feature summary
show_feature_summary(results)
# Check GPU
gpu_available = test_gpu_availability()
print("\n" + "=" * 50)
# Decide which version to run
if not results['essential_available']:
logger.error("β Essential dependencies missing!")
logger.error("π‘ Please install required packages:")
logger.error(" pip install gradio pandas requests beautifulsoup4")
# Offer to install automatically
user_input = input("\nπ€ Try to install missing packages automatically? (y/n): ")
if user_input.lower() in ['y', 'yes']:
if install_missing_packages(results['missing_essential']):
logger.info("π Restarting with new packages...")
# Re-run diagnosis
results = diagnose_system()
else:
logger.error("β Automatic installation failed")
sys.exit(1)
else:
sys.exit(1)
# Choose version based on capabilities
if results['essential_available']:
if results['ai_models_available']:
logger.info("π― Running full-featured version with AI capabilities")
try:
run_full_version()
except Exception as e:
logger.error(f"β Full version failed, falling back to minimal: {e}")
run_minimal_version()
else:
logger.info("π― Running minimal version (AI features not available)")
run_minimal_version()
else:
logger.error("β Cannot start - essential dependencies missing")
sys.exit(1)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
logger.info("\nπ Startup cancelled by user")
sys.exit(0)
except Exception as e:
logger.error(f"β Startup failed: {e}")
logger.error("π‘ Try running directly: python app_minimal.py")
sys.exit(1) |