File size: 7,768 Bytes
7e60a42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
#!/usr/bin/env python3
"""
Smart startup script for AI Dataset Studio
Automatically detects available features and chooses the best version to run
"""

import sys
import logging
import importlib
from typing import Dict, List, Tuple

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]:
    """Check if a module can be imported"""
    try:
        importlib.import_module(module_name)
        return True, f"βœ… {module_name}"
    except ImportError as e:
        return False, f"❌ {module_name}: {str(e)}"

def diagnose_system() -> Dict[str, bool]:
    """Diagnose system capabilities"""
    logger.info("πŸ” Diagnosing system capabilities...")
    
    # Essential dependencies
    essential_deps = [
        ('gradio', 'gradio'),
        ('requests', 'requests'),
        ('bs4', 'beautifulsoup4'),
        ('pandas', 'pandas'),
        ('numpy', 'numpy')
    ]
    
    # Optional dependencies
    optional_deps = [
        ('transformers', 'transformers'),
        ('torch', 'torch'),
        ('datasets', 'datasets'),
        ('nltk', 'nltk'),
        ('sentence_transformers', 'sentence-transformers')
    ]
    
    results = {
        'essential_available': True,
        'ai_models_available': False,
        'nlp_available': False,
        'datasets_available': False,
        'missing_essential': [],
        'missing_optional': []
    }
    
    # Check essential dependencies
    logger.info("πŸ“‹ Checking essential dependencies...")
    for module, package in essential_deps:
        available, msg = check_import(module, package)
        logger.info(f"  {msg}")
        if not available:
            results['essential_available'] = False
            results['missing_essential'].append(package)
    
    # Check optional dependencies
    logger.info("πŸ“‹ Checking optional dependencies...")
    for module, package in optional_deps:
        available, msg = check_import(module, package)
        logger.info(f"  {msg}")
        if not available:
            results['missing_optional'].append(package)
        else:
            if module in ['transformers', 'torch']:
                results['ai_models_available'] = True
            elif module == 'nltk':
                results['nlp_available'] = True
            elif module == 'datasets':
                results['datasets_available'] = True
    
    return results

def test_gpu_availability() -> bool:
    """Test if GPU is available"""
    try:
        import torch
        gpu_available = torch.cuda.is_available()
        if gpu_available:
            gpu_name = torch.cuda.get_device_name(0)
            logger.info(f"πŸš€ GPU available: {gpu_name}")
        else:
            logger.info("πŸ’» Using CPU (GPU not available)")
        return gpu_available
    except ImportError:
        logger.info("πŸ’» Using CPU (PyTorch not available)")
        return False

def install_missing_packages(packages: List[str]) -> bool:
    """Attempt to install missing packages"""
    if not packages:
        return True
    
    logger.info(f"πŸ“¦ Attempting to install missing packages: {', '.join(packages)}")
    
    try:
        import subprocess
        
        # Try to install packages
        cmd = [sys.executable, "-m", "pip", "install"] + packages
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        if result.returncode == 0:
            logger.info("βœ… Packages installed successfully!")
            return True
        else:
            logger.error(f"❌ Installation failed: {result.stderr}")
            return False
            
    except Exception as e:
        logger.error(f"❌ Installation error: {e}")
        return False

def run_full_version():
    """Run the full-featured version"""
    logger.info("πŸš€ Starting full AI Dataset Studio...")
    try:
        # Import and run the main app
        import app
        logger.info("βœ… Full version loaded successfully")
    except Exception as e:
        logger.error(f"❌ Full version failed: {e}")
        raise

def run_minimal_version():
    """Run the minimal version"""
    logger.info("πŸš€ Starting minimal AI Dataset Studio...")
    try:
        # Import and run the minimal app
        import app_minimal
        logger.info("βœ… Minimal version loaded successfully")
    except Exception as e:
        logger.error(f"❌ Minimal version failed: {e}")
        raise

def show_feature_summary(results: Dict[str, bool]):
    """Show a summary of available features"""
    logger.info("πŸ“Š Feature Summary:")
    
    if results['essential_available']:
        logger.info("  βœ… Core web scraping and data processing")
        logger.info("  βœ… CSV and JSON export")
        logger.info("  βœ… Quality filtering and text cleaning")
    
    if results['ai_models_available']:
        logger.info("  βœ… AI-powered sentiment analysis")
        logger.info("  βœ… Named entity recognition")
        logger.info("  βœ… Advanced content quality assessment")
    else:
        logger.info("  ⚠️ AI features disabled (install transformers + torch)")
    
    if results['nlp_available']:
        logger.info("  βœ… Advanced text processing with NLTK")
    else:
        logger.info("  ⚠️ Basic text processing only (install nltk)")
    
    if results['datasets_available']:
        logger.info("  βœ… HuggingFace Datasets export")
    else:
        logger.info("  ⚠️ Standard export only (install datasets)")

def main():
    """Main startup function"""
    print("πŸš€ AI Dataset Studio - Smart Startup")
    print("=" * 50)
    
    # Diagnose system
    results = diagnose_system()
    
    # Show feature summary
    show_feature_summary(results)
    
    # Check GPU
    gpu_available = test_gpu_availability()
    
    print("\n" + "=" * 50)
    
    # Decide which version to run
    if not results['essential_available']:
        logger.error("❌ Essential dependencies missing!")
        logger.error("πŸ’‘ Please install required packages:")
        logger.error("   pip install gradio pandas requests beautifulsoup4")
        
        # Offer to install automatically
        user_input = input("\nπŸ€” Try to install missing packages automatically? (y/n): ")
        if user_input.lower() in ['y', 'yes']:
            if install_missing_packages(results['missing_essential']):
                logger.info("πŸ”„ Restarting with new packages...")
                # Re-run diagnosis
                results = diagnose_system()
            else:
                logger.error("❌ Automatic installation failed")
                sys.exit(1)
        else:
            sys.exit(1)
    
    # Choose version based on capabilities
    if results['essential_available']:
        if results['ai_models_available']:
            logger.info("🎯 Running full-featured version with AI capabilities")
            try:
                run_full_version()
            except Exception as e:
                logger.error(f"❌ Full version failed, falling back to minimal: {e}")
                run_minimal_version()
        else:
            logger.info("🎯 Running minimal version (AI features not available)")
            run_minimal_version()
    else:
        logger.error("❌ Cannot start - essential dependencies missing")
        sys.exit(1)

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        logger.info("\nπŸ‘‹ Startup cancelled by user")
        sys.exit(0)
    except Exception as e:
        logger.error(f"❌ Startup failed: {e}")
        logger.error("πŸ’‘ Try running directly: python app_minimal.py")
        sys.exit(1)