Spaces:

MagicMeWizard
/

AI_Powered_Web_Scraper

Running

App Files Files Community

MagicMeWizard commited on 5 days ago

Commit

7e60a42

verified ·

1 Parent(s): 4fc0c1e

Create startup.py

Browse files

Files changed (1) hide show

startup.py +232 -0

startup.py ADDED Viewed

	@@ -0,0 +1,232 @@

+#!/usr/bin/env python3
+"""
+Smart startup script for AI Dataset Studio
+Automatically detects available features and chooses the best version to run
+"""
+import sys
+import logging
+import importlib
+from typing import Dict, List, Tuple
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]:
+    """Check if a module can be imported"""
+    try:
+        importlib.import_module(module_name)
+        return True, f"✅ {module_name}"
+    except ImportError as e:
+        return False, f"❌ {module_name}: {str(e)}"
+def diagnose_system() -> Dict[str, bool]:
+    """Diagnose system capabilities"""
+    logger.info("🔍 Diagnosing system capabilities...")
+    # Essential dependencies
+    essential_deps = [
+        ('gradio', 'gradio'),
+        ('requests', 'requests'),
+        ('bs4', 'beautifulsoup4'),
+        ('pandas', 'pandas'),
+        ('numpy', 'numpy')
+    ]
+    # Optional dependencies
+    optional_deps = [
+        ('transformers', 'transformers'),
+        ('torch', 'torch'),
+        ('datasets', 'datasets'),
+        ('nltk', 'nltk'),
+        ('sentence_transformers', 'sentence-transformers')
+    ]
+    results = {
+        'essential_available': True,
+        'ai_models_available': False,
+        'nlp_available': False,
+        'datasets_available': False,
+        'missing_essential': [],
+        'missing_optional': []
+    }
+    # Check essential dependencies
+    logger.info("📋 Checking essential dependencies...")
+    for module, package in essential_deps:
+        available, msg = check_import(module, package)
+        logger.info(f"  {msg}")
+        if not available:
+            results['essential_available'] = False
+            results['missing_essential'].append(package)
+    # Check optional dependencies
+    logger.info("📋 Checking optional dependencies...")
+    for module, package in optional_deps:
+        available, msg = check_import(module, package)
+        logger.info(f"  {msg}")
+        if not available:
+            results['missing_optional'].append(package)
+        else:
+            if module in ['transformers', 'torch']:
+                results['ai_models_available'] = True
+            elif module == 'nltk':
+                results['nlp_available'] = True
+            elif module == 'datasets':
+                results['datasets_available'] = True
+    return results
+def test_gpu_availability() -> bool:
+    """Test if GPU is available"""
+    try:
+        import torch
+        gpu_available = torch.cuda.is_available()
+        if gpu_available:
+            gpu_name = torch.cuda.get_device_name(0)
+            logger.info(f"🚀 GPU available: {gpu_name}")
+        else:
+            logger.info("💻 Using CPU (GPU not available)")
+        return gpu_available
+    except ImportError:
+        logger.info("💻 Using CPU (PyTorch not available)")
+        return False
+def install_missing_packages(packages: List[str]) -> bool:
+    """Attempt to install missing packages"""
+    if not packages:
+        return True
+    logger.info(f"📦 Attempting to install missing packages: {', '.join(packages)}")
+    try:
+        import subprocess
+        # Try to install packages
+        cmd = [sys.executable, "-m", "pip", "install"] + packages
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode == 0:
+            logger.info("✅ Packages installed successfully!")
+            return True
+        else:
+            logger.error(f"❌ Installation failed: {result.stderr}")
+            return False
+    except Exception as e:
+        logger.error(f"❌ Installation error: {e}")
+        return False
+def run_full_version():
+    """Run the full-featured version"""
+    logger.info("🚀 Starting full AI Dataset Studio...")
+    try:
+        # Import and run the main app
+        import app
+        logger.info("✅ Full version loaded successfully")
+    except Exception as e:
+        logger.error(f"❌ Full version failed: {e}")
+        raise
+def run_minimal_version():
+    """Run the minimal version"""
+    logger.info("🚀 Starting minimal AI Dataset Studio...")
+    try:
+        # Import and run the minimal app
+        import app_minimal
+        logger.info("✅ Minimal version loaded successfully")
+    except Exception as e:
+        logger.error(f"❌ Minimal version failed: {e}")
+        raise
+def show_feature_summary(results: Dict[str, bool]):
+    """Show a summary of available features"""
+    logger.info("📊 Feature Summary:")
+    if results['essential_available']:
+        logger.info("  ✅ Core web scraping and data processing")
+        logger.info("  ✅ CSV and JSON export")
+        logger.info("  ✅ Quality filtering and text cleaning")
+    if results['ai_models_available']:
+        logger.info("  ✅ AI-powered sentiment analysis")
+        logger.info("  ✅ Named entity recognition")
+        logger.info("  ✅ Advanced content quality assessment")
+    else:
+        logger.info("  ⚠️ AI features disabled (install transformers + torch)")
+    if results['nlp_available']:
+        logger.info("  ✅ Advanced text processing with NLTK")
+    else:
+        logger.info("  ⚠️ Basic text processing only (install nltk)")
+    if results['datasets_available']:
+        logger.info("  ✅ HuggingFace Datasets export")
+    else:
+        logger.info("  ⚠️ Standard export only (install datasets)")
+def main():
+    """Main startup function"""
+    print("🚀 AI Dataset Studio - Smart Startup")
+    print("=" * 50)
+    # Diagnose system
+    results = diagnose_system()
+    # Show feature summary
+    show_feature_summary(results)
+    # Check GPU
+    gpu_available = test_gpu_availability()
+    print("\n" + "=" * 50)
+    # Decide which version to run
+    if not results['essential_available']:
+        logger.error("❌ Essential dependencies missing!")
+        logger.error("💡 Please install required packages:")
+        logger.error("   pip install gradio pandas requests beautifulsoup4")
+        # Offer to install automatically
+        user_input = input("\n🤔 Try to install missing packages automatically? (y/n): ")
+        if user_input.lower() in ['y', 'yes']:
+            if install_missing_packages(results['missing_essential']):
+                logger.info("🔄 Restarting with new packages...")
+                # Re-run diagnosis
+                results = diagnose_system()
+            else:
+                logger.error("❌ Automatic installation failed")
+                sys.exit(1)
+        else:
+            sys.exit(1)
+    # Choose version based on capabilities
+    if results['essential_available']:
+        if results['ai_models_available']:
+            logger.info("🎯 Running full-featured version with AI capabilities")
+            try:
+                run_full_version()
+            except Exception as e:
+                logger.error(f"❌ Full version failed, falling back to minimal: {e}")
+                run_minimal_version()
+        else:
+            logger.info("🎯 Running minimal version (AI features not available)")
+            run_minimal_version()
+    else:
+        logger.error("❌ Cannot start - essential dependencies missing")
+        sys.exit(1)
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        logger.info("\n👋 Startup cancelled by user")
+        sys.exit(0)
+    except Exception as e:
+        logger.error(f"❌ Startup failed: {e}")
+        logger.error("💡 Try running directly: python app_minimal.py")
+        sys.exit(1)