#!/usr/bin/env python3 """ Smart startup script for AI Dataset Studio Automatically detects available features and chooses the best version to run """ import sys import logging import importlib from typing import Dict, List, Tuple # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]: """Check if a module can be imported""" try: importlib.import_module(module_name) return True, f"āœ… {module_name}" except ImportError as e: return False, f"āŒ {module_name}: {str(e)}" def diagnose_system() -> Dict[str, bool]: """Diagnose system capabilities""" logger.info("šŸ” Diagnosing system capabilities...") # Essential dependencies essential_deps = [ ('gradio', 'gradio'), ('requests', 'requests'), ('bs4', 'beautifulsoup4'), ('pandas', 'pandas'), ('numpy', 'numpy') ] # Optional dependencies optional_deps = [ ('transformers', 'transformers'), ('torch', 'torch'), ('datasets', 'datasets'), ('nltk', 'nltk'), ('sentence_transformers', 'sentence-transformers') ] results = { 'essential_available': True, 'ai_models_available': False, 'nlp_available': False, 'datasets_available': False, 'missing_essential': [], 'missing_optional': [] } # Check essential dependencies logger.info("šŸ“‹ Checking essential dependencies...") for module, package in essential_deps: available, msg = check_import(module, package) logger.info(f" {msg}") if not available: results['essential_available'] = False results['missing_essential'].append(package) # Check optional dependencies logger.info("šŸ“‹ Checking optional dependencies...") for module, package in optional_deps: available, msg = check_import(module, package) logger.info(f" {msg}") if not available: results['missing_optional'].append(package) else: if module in ['transformers', 'torch']: results['ai_models_available'] = True elif module == 'nltk': results['nlp_available'] = True elif module == 'datasets': results['datasets_available'] = True return results def test_gpu_availability() -> bool: """Test if GPU is available""" try: import torch gpu_available = torch.cuda.is_available() if gpu_available: gpu_name = torch.cuda.get_device_name(0) logger.info(f"šŸš€ GPU available: {gpu_name}") else: logger.info("šŸ’» Using CPU (GPU not available)") return gpu_available except ImportError: logger.info("šŸ’» Using CPU (PyTorch not available)") return False def install_missing_packages(packages: List[str]) -> bool: """Attempt to install missing packages""" if not packages: return True logger.info(f"šŸ“¦ Attempting to install missing packages: {', '.join(packages)}") try: import subprocess # Try to install packages cmd = [sys.executable, "-m", "pip", "install"] + packages result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: logger.info("āœ… Packages installed successfully!") return True else: logger.error(f"āŒ Installation failed: {result.stderr}") return False except Exception as e: logger.error(f"āŒ Installation error: {e}") return False def run_full_version(): """Run the full-featured version""" logger.info("šŸš€ Starting full AI Dataset Studio...") try: # Import and run the main app import app logger.info("āœ… Full version loaded successfully") except Exception as e: logger.error(f"āŒ Full version failed: {e}") raise def run_minimal_version(): """Run the minimal version""" logger.info("šŸš€ Starting minimal AI Dataset Studio...") try: # Import and run the minimal app import app_minimal logger.info("āœ… Minimal version loaded successfully") except Exception as e: logger.error(f"āŒ Minimal version failed: {e}") raise def show_feature_summary(results: Dict[str, bool]): """Show a summary of available features""" logger.info("šŸ“Š Feature Summary:") if results['essential_available']: logger.info(" āœ… Core web scraping and data processing") logger.info(" āœ… CSV and JSON export") logger.info(" āœ… Quality filtering and text cleaning") if results['ai_models_available']: logger.info(" āœ… AI-powered sentiment analysis") logger.info(" āœ… Named entity recognition") logger.info(" āœ… Advanced content quality assessment") else: logger.info(" āš ļø AI features disabled (install transformers + torch)") if results['nlp_available']: logger.info(" āœ… Advanced text processing with NLTK") else: logger.info(" āš ļø Basic text processing only (install nltk)") if results['datasets_available']: logger.info(" āœ… HuggingFace Datasets export") else: logger.info(" āš ļø Standard export only (install datasets)") def main(): """Main startup function""" print("šŸš€ AI Dataset Studio - Smart Startup") print("=" * 50) # Diagnose system results = diagnose_system() # Show feature summary show_feature_summary(results) # Check GPU gpu_available = test_gpu_availability() print("\n" + "=" * 50) # Decide which version to run if not results['essential_available']: logger.error("āŒ Essential dependencies missing!") logger.error("šŸ’” Please install required packages:") logger.error(" pip install gradio pandas requests beautifulsoup4") # Offer to install automatically user_input = input("\nšŸ¤” Try to install missing packages automatically? (y/n): ") if user_input.lower() in ['y', 'yes']: if install_missing_packages(results['missing_essential']): logger.info("šŸ”„ Restarting with new packages...") # Re-run diagnosis results = diagnose_system() else: logger.error("āŒ Automatic installation failed") sys.exit(1) else: sys.exit(1) # Choose version based on capabilities if results['essential_available']: if results['ai_models_available']: logger.info("šŸŽÆ Running full-featured version with AI capabilities") try: run_full_version() except Exception as e: logger.error(f"āŒ Full version failed, falling back to minimal: {e}") run_minimal_version() else: logger.info("šŸŽÆ Running minimal version (AI features not available)") run_minimal_version() else: logger.error("āŒ Cannot start - essential dependencies missing") sys.exit(1) if __name__ == "__main__": try: main() except KeyboardInterrupt: logger.info("\nšŸ‘‹ Startup cancelled by user") sys.exit(0) except Exception as e: logger.error(f"āŒ Startup failed: {e}") logger.error("šŸ’” Try running directly: python app_minimal.py") sys.exit(1)