MagicMeWizard commited on
Commit
7e60a42
Β·
verified Β·
1 Parent(s): 4fc0c1e

Create startup.py

Browse files
Files changed (1) hide show
  1. startup.py +232 -0
startup.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Smart startup script for AI Dataset Studio
4
+ Automatically detects available features and chooses the best version to run
5
+ """
6
+
7
+ import sys
8
+ import logging
9
+ import importlib
10
+ from typing import Dict, List, Tuple
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.INFO,
15
+ format='%(asctime)s - %(levelname)s - %(message)s'
16
+ )
17
+ logger = logging.getLogger(__name__)
18
+
19
+ def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]:
20
+ """Check if a module can be imported"""
21
+ try:
22
+ importlib.import_module(module_name)
23
+ return True, f"βœ… {module_name}"
24
+ except ImportError as e:
25
+ return False, f"❌ {module_name}: {str(e)}"
26
+
27
+ def diagnose_system() -> Dict[str, bool]:
28
+ """Diagnose system capabilities"""
29
+ logger.info("πŸ” Diagnosing system capabilities...")
30
+
31
+ # Essential dependencies
32
+ essential_deps = [
33
+ ('gradio', 'gradio'),
34
+ ('requests', 'requests'),
35
+ ('bs4', 'beautifulsoup4'),
36
+ ('pandas', 'pandas'),
37
+ ('numpy', 'numpy')
38
+ ]
39
+
40
+ # Optional dependencies
41
+ optional_deps = [
42
+ ('transformers', 'transformers'),
43
+ ('torch', 'torch'),
44
+ ('datasets', 'datasets'),
45
+ ('nltk', 'nltk'),
46
+ ('sentence_transformers', 'sentence-transformers')
47
+ ]
48
+
49
+ results = {
50
+ 'essential_available': True,
51
+ 'ai_models_available': False,
52
+ 'nlp_available': False,
53
+ 'datasets_available': False,
54
+ 'missing_essential': [],
55
+ 'missing_optional': []
56
+ }
57
+
58
+ # Check essential dependencies
59
+ logger.info("πŸ“‹ Checking essential dependencies...")
60
+ for module, package in essential_deps:
61
+ available, msg = check_import(module, package)
62
+ logger.info(f" {msg}")
63
+ if not available:
64
+ results['essential_available'] = False
65
+ results['missing_essential'].append(package)
66
+
67
+ # Check optional dependencies
68
+ logger.info("πŸ“‹ Checking optional dependencies...")
69
+ for module, package in optional_deps:
70
+ available, msg = check_import(module, package)
71
+ logger.info(f" {msg}")
72
+ if not available:
73
+ results['missing_optional'].append(package)
74
+ else:
75
+ if module in ['transformers', 'torch']:
76
+ results['ai_models_available'] = True
77
+ elif module == 'nltk':
78
+ results['nlp_available'] = True
79
+ elif module == 'datasets':
80
+ results['datasets_available'] = True
81
+
82
+ return results
83
+
84
+ def test_gpu_availability() -> bool:
85
+ """Test if GPU is available"""
86
+ try:
87
+ import torch
88
+ gpu_available = torch.cuda.is_available()
89
+ if gpu_available:
90
+ gpu_name = torch.cuda.get_device_name(0)
91
+ logger.info(f"πŸš€ GPU available: {gpu_name}")
92
+ else:
93
+ logger.info("πŸ’» Using CPU (GPU not available)")
94
+ return gpu_available
95
+ except ImportError:
96
+ logger.info("πŸ’» Using CPU (PyTorch not available)")
97
+ return False
98
+
99
+ def install_missing_packages(packages: List[str]) -> bool:
100
+ """Attempt to install missing packages"""
101
+ if not packages:
102
+ return True
103
+
104
+ logger.info(f"πŸ“¦ Attempting to install missing packages: {', '.join(packages)}")
105
+
106
+ try:
107
+ import subprocess
108
+
109
+ # Try to install packages
110
+ cmd = [sys.executable, "-m", "pip", "install"] + packages
111
+ result = subprocess.run(cmd, capture_output=True, text=True)
112
+
113
+ if result.returncode == 0:
114
+ logger.info("βœ… Packages installed successfully!")
115
+ return True
116
+ else:
117
+ logger.error(f"❌ Installation failed: {result.stderr}")
118
+ return False
119
+
120
+ except Exception as e:
121
+ logger.error(f"❌ Installation error: {e}")
122
+ return False
123
+
124
+ def run_full_version():
125
+ """Run the full-featured version"""
126
+ logger.info("πŸš€ Starting full AI Dataset Studio...")
127
+ try:
128
+ # Import and run the main app
129
+ import app
130
+ logger.info("βœ… Full version loaded successfully")
131
+ except Exception as e:
132
+ logger.error(f"❌ Full version failed: {e}")
133
+ raise
134
+
135
+ def run_minimal_version():
136
+ """Run the minimal version"""
137
+ logger.info("πŸš€ Starting minimal AI Dataset Studio...")
138
+ try:
139
+ # Import and run the minimal app
140
+ import app_minimal
141
+ logger.info("βœ… Minimal version loaded successfully")
142
+ except Exception as e:
143
+ logger.error(f"❌ Minimal version failed: {e}")
144
+ raise
145
+
146
+ def show_feature_summary(results: Dict[str, bool]):
147
+ """Show a summary of available features"""
148
+ logger.info("πŸ“Š Feature Summary:")
149
+
150
+ if results['essential_available']:
151
+ logger.info(" βœ… Core web scraping and data processing")
152
+ logger.info(" βœ… CSV and JSON export")
153
+ logger.info(" βœ… Quality filtering and text cleaning")
154
+
155
+ if results['ai_models_available']:
156
+ logger.info(" βœ… AI-powered sentiment analysis")
157
+ logger.info(" βœ… Named entity recognition")
158
+ logger.info(" βœ… Advanced content quality assessment")
159
+ else:
160
+ logger.info(" ⚠️ AI features disabled (install transformers + torch)")
161
+
162
+ if results['nlp_available']:
163
+ logger.info(" βœ… Advanced text processing with NLTK")
164
+ else:
165
+ logger.info(" ⚠️ Basic text processing only (install nltk)")
166
+
167
+ if results['datasets_available']:
168
+ logger.info(" βœ… HuggingFace Datasets export")
169
+ else:
170
+ logger.info(" ⚠️ Standard export only (install datasets)")
171
+
172
+ def main():
173
+ """Main startup function"""
174
+ print("πŸš€ AI Dataset Studio - Smart Startup")
175
+ print("=" * 50)
176
+
177
+ # Diagnose system
178
+ results = diagnose_system()
179
+
180
+ # Show feature summary
181
+ show_feature_summary(results)
182
+
183
+ # Check GPU
184
+ gpu_available = test_gpu_availability()
185
+
186
+ print("\n" + "=" * 50)
187
+
188
+ # Decide which version to run
189
+ if not results['essential_available']:
190
+ logger.error("❌ Essential dependencies missing!")
191
+ logger.error("πŸ’‘ Please install required packages:")
192
+ logger.error(" pip install gradio pandas requests beautifulsoup4")
193
+
194
+ # Offer to install automatically
195
+ user_input = input("\nπŸ€” Try to install missing packages automatically? (y/n): ")
196
+ if user_input.lower() in ['y', 'yes']:
197
+ if install_missing_packages(results['missing_essential']):
198
+ logger.info("πŸ”„ Restarting with new packages...")
199
+ # Re-run diagnosis
200
+ results = diagnose_system()
201
+ else:
202
+ logger.error("❌ Automatic installation failed")
203
+ sys.exit(1)
204
+ else:
205
+ sys.exit(1)
206
+
207
+ # Choose version based on capabilities
208
+ if results['essential_available']:
209
+ if results['ai_models_available']:
210
+ logger.info("🎯 Running full-featured version with AI capabilities")
211
+ try:
212
+ run_full_version()
213
+ except Exception as e:
214
+ logger.error(f"❌ Full version failed, falling back to minimal: {e}")
215
+ run_minimal_version()
216
+ else:
217
+ logger.info("🎯 Running minimal version (AI features not available)")
218
+ run_minimal_version()
219
+ else:
220
+ logger.error("❌ Cannot start - essential dependencies missing")
221
+ sys.exit(1)
222
+
223
+ if __name__ == "__main__":
224
+ try:
225
+ main()
226
+ except KeyboardInterrupt:
227
+ logger.info("\nπŸ‘‹ Startup cancelled by user")
228
+ sys.exit(0)
229
+ except Exception as e:
230
+ logger.error(f"❌ Startup failed: {e}")
231
+ logger.error("πŸ’‘ Try running directly: python app_minimal.py")
232
+ sys.exit(1)