MagicMeWizard commited on
Commit
dbedabb
Β·
verified Β·
1 Parent(s): dcb20f6

Create test_perplexity.py

Browse files
Files changed (1) hide show
  1. test_perplexity.py +786 -0
test_perplexity.py ADDED
@@ -0,0 +1,786 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ πŸ§ͺ Testing utilities for Perplexity AI integration
3
+ Run comprehensive tests to validate your AI Dataset Studio deployment
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import time
9
+ import logging
10
+ from typing import Dict, List, Tuple, Optional
11
+ from datetime import datetime
12
+
13
+ # Configure logging for tests
14
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
+ logger = logging.getLogger(__name__)
16
+
17
+ def test_environment_setup() -> Dict[str, bool]:
18
+ """
19
+ πŸ” Test environment setup and dependencies
20
+
21
+ Returns:
22
+ Dict with test results for each component
23
+ """
24
+ results = {}
25
+
26
+ print("πŸ” Testing Environment Setup...")
27
+ print("=" * 50)
28
+
29
+ # Test 1: Check Python version
30
+ try:
31
+ import sys
32
+ python_version = sys.version_info
33
+ if python_version >= (3, 8):
34
+ print(f"βœ… Python version: {python_version.major}.{python_version.minor}")
35
+ results['python_version'] = True
36
+ else:
37
+ print(f"❌ Python version too old: {python_version.major}.{python_version.minor} (need 3.8+)")
38
+ results['python_version'] = False
39
+ except Exception as e:
40
+ print(f"❌ Python version check failed: {e}")
41
+ results['python_version'] = False
42
+
43
+ # Test 2: Check required packages
44
+ required_packages = [
45
+ ('gradio', 'Gradio'),
46
+ ('requests', 'Requests'),
47
+ ('pandas', 'Pandas'),
48
+ ('beautifulsoup4', 'BeautifulSoup'),
49
+ ('transformers', 'Transformers'),
50
+ ('torch', 'PyTorch'),
51
+ ('nltk', 'NLTK')
52
+ ]
53
+
54
+ for package, name in required_packages:
55
+ try:
56
+ __import__(package)
57
+ print(f"βœ… {name} imported successfully")
58
+ results[f'package_{package}'] = True
59
+ except ImportError:
60
+ print(f"⚠️ {name} not available (optional for some features)")
61
+ results[f'package_{package}'] = False
62
+
63
+ # Test 3: Check environment variables
64
+ env_vars = ['PERPLEXITY_API_KEY', 'HF_TOKEN']
65
+ for var in env_vars:
66
+ if os.getenv(var):
67
+ print(f"βœ… {var} is set")
68
+ results[f'env_{var.lower()}'] = True
69
+ else:
70
+ status = "❌" if var == 'PERPLEXITY_API_KEY' else "⚠️"
71
+ required = "required" if var == 'PERPLEXITY_API_KEY' else "optional"
72
+ print(f"{status} {var} not set ({required})")
73
+ results[f'env_{var.lower()}'] = bool(os.getenv(var))
74
+
75
+ # Test 4: Check file structure
76
+ required_files = ['app.py', 'perplexity_client.py', 'config.py', 'requirements.txt']
77
+ for file in required_files:
78
+ if os.path.exists(file):
79
+ print(f"βœ… {file} found")
80
+ results[f'file_{file}'] = True
81
+ else:
82
+ print(f"❌ {file} missing")
83
+ results[f'file_{file}'] = False
84
+
85
+ print("\n" + "=" * 50)
86
+ return results
87
+
88
+ def test_perplexity_api() -> Dict[str, any]:
89
+ """
90
+ 🧠 Test Perplexity API connectivity and functionality
91
+
92
+ Returns:
93
+ Dict with API test results
94
+ """
95
+ results = {
96
+ 'api_key_valid': False,
97
+ 'connection_successful': False,
98
+ 'response_quality': False,
99
+ 'rate_limiting': False,
100
+ 'error_handling': False
101
+ }
102
+
103
+ print("🧠 Testing Perplexity API...")
104
+ print("=" * 50)
105
+
106
+ try:
107
+ from perplexity_client import PerplexityClient, SearchType
108
+
109
+ # Test 1: API Key validation
110
+ client = PerplexityClient()
111
+ if client._validate_api_key():
112
+ print("βœ… API key is valid")
113
+ results['api_key_valid'] = True
114
+ else:
115
+ print("❌ API key validation failed")
116
+ return results
117
+
118
+ # Test 2: Basic connection
119
+ try:
120
+ test_results = client.discover_sources(
121
+ project_description="Test query for API connectivity",
122
+ search_type=SearchType.GENERAL,
123
+ max_sources=5
124
+ )
125
+
126
+ if test_results.sources or test_results.perplexity_response:
127
+ print("βœ… API connection successful")
128
+ results['connection_successful'] = True
129
+ else:
130
+ print("⚠️ API connected but no results returned")
131
+ results['connection_successful'] = True
132
+
133
+ except Exception as e:
134
+ print(f"❌ API connection failed: {e}")
135
+ return results
136
+
137
+ # Test 3: Response quality
138
+ try:
139
+ quality_test = client.discover_sources(
140
+ project_description="Find product reviews for sentiment analysis machine learning training",
141
+ search_type=SearchType.GENERAL,
142
+ max_sources=10
143
+ )
144
+
145
+ if len(quality_test.sources) >= 3:
146
+ avg_score = sum(s.relevance_score for s in quality_test.sources) / len(quality_test.sources)
147
+ if avg_score >= 5.0:
148
+ print(f"βœ… Response quality good (avg score: {avg_score:.1f})")
149
+ results['response_quality'] = True
150
+ else:
151
+ print(f"⚠️ Response quality moderate (avg score: {avg_score:.1f})")
152
+ results['response_quality'] = True
153
+ else:
154
+ print("⚠️ Limited response quality (few sources found)")
155
+
156
+ except Exception as e:
157
+ print(f"⚠️ Response quality test failed: {e}")
158
+
159
+ # Test 4: Rate limiting
160
+ try:
161
+ start_time = time.time()
162
+
163
+ # Make two quick requests
164
+ client.discover_sources("Test query 1", max_sources=3)
165
+ time.sleep(0.1) # Small delay
166
+ client.discover_sources("Test query 2", max_sources=3)
167
+
168
+ elapsed = time.time() - start_time
169
+ if elapsed >= 1.0: # Should be rate limited to ~1 second minimum
170
+ print("βœ… Rate limiting is working")
171
+ results['rate_limiting'] = True
172
+ else:
173
+ print("⚠️ Rate limiting may not be active")
174
+
175
+ except Exception as e:
176
+ print(f"⚠️ Rate limiting test inconclusive: {e}")
177
+
178
+ # Test 5: Error handling
179
+ try:
180
+ # Test with invalid/empty query
181
+ error_test = client.discover_sources("", max_sources=1)
182
+ print("βœ… Error handling works (handled empty query)")
183
+ results['error_handling'] = True
184
+
185
+ except Exception as e:
186
+ print(f"βœ… Error handling works (caught exception: {type(e).__name__})")
187
+ results['error_handling'] = True
188
+
189
+ except ImportError:
190
+ print("❌ Cannot import perplexity_client module")
191
+ except Exception as e:
192
+ print(f"❌ Unexpected error in Perplexity tests: {e}")
193
+
194
+ print("\n" + "=" * 50)
195
+ return results
196
+
197
+ def test_ai_models() -> Dict[str, bool]:
198
+ """
199
+ πŸ€– Test AI model loading and functionality
200
+
201
+ Returns:
202
+ Dict with model test results
203
+ """
204
+ results = {}
205
+
206
+ print("πŸ€– Testing AI Models...")
207
+ print("=" * 50)
208
+
209
+ try:
210
+ from transformers import pipeline
211
+ import torch
212
+
213
+ # Check GPU availability
214
+ gpu_available = torch.cuda.is_available()
215
+ print(f"πŸ”§ GPU available: {gpu_available}")
216
+ results['gpu_available'] = gpu_available
217
+
218
+ # Test sentiment analysis model
219
+ try:
220
+ sentiment_analyzer = pipeline(
221
+ "sentiment-analysis",
222
+ model="cardiffnlp/twitter-roberta-base-sentiment-latest",
223
+ return_all_scores=True
224
+ )
225
+
226
+ test_text = "This is a great product!"
227
+ result = sentiment_analyzer(test_text)
228
+
229
+ if result and len(result[0]) > 0:
230
+ print("βœ… Sentiment analysis model loaded and working")
231
+ results['sentiment_model'] = True
232
+ else:
233
+ print("❌ Sentiment analysis model not working properly")
234
+ results['sentiment_model'] = False
235
+
236
+ except Exception as e:
237
+ print(f"⚠️ Sentiment analysis model failed: {e}")
238
+ results['sentiment_model'] = False
239
+
240
+ # Test summarization model
241
+ try:
242
+ summarizer = pipeline(
243
+ "summarization",
244
+ model="facebook/bart-large-cnn",
245
+ max_length=100,
246
+ min_length=30
247
+ )
248
+
249
+ test_text = """
250
+ Artificial intelligence has become increasingly important in modern technology.
251
+ Machine learning algorithms are being used across various industries to solve
252
+ complex problems and improve efficiency. Natural language processing, computer
253
+ vision, and robotics are some of the key areas where AI is making significant
254
+ contributions to society and business.
255
+ """
256
+
257
+ result = summarizer(test_text)
258
+
259
+ if result and len(result[0]['summary_text']) > 10:
260
+ print("βœ… Summarization model loaded and working")
261
+ results['summarization_model'] = True
262
+ else:
263
+ print("❌ Summarization model not working properly")
264
+ results['summarization_model'] = False
265
+
266
+ except Exception as e:
267
+ print(f"⚠️ Summarization model failed: {e}")
268
+ results['summarization_model'] = False
269
+
270
+ # Test NER model
271
+ try:
272
+ ner_model = pipeline(
273
+ "ner",
274
+ model="dbmdz/bert-large-cased-finetuned-conll03-english",
275
+ aggregation_strategy="simple"
276
+ )
277
+
278
+ test_text = "Apple Inc. was founded by Steve Jobs in California."
279
+ result = ner_model(test_text)
280
+
281
+ if result and len(result) > 0:
282
+ print("βœ… NER model loaded and working")
283
+ results['ner_model'] = True
284
+ else:
285
+ print("❌ NER model not working properly")
286
+ results['ner_model'] = False
287
+
288
+ except Exception as e:
289
+ print(f"⚠️ NER model failed: {e}")
290
+ results['ner_model'] = False
291
+
292
+ except ImportError:
293
+ print("❌ Transformers not available - AI models cannot be tested")
294
+ results = {'transformers_available': False}
295
+
296
+ print("\n" + "=" * 50)
297
+ return results
298
+
299
+ def test_web_scraping() -> Dict[str, bool]:
300
+ """
301
+ πŸ•·οΈ Test web scraping functionality
302
+
303
+ Returns:
304
+ Dict with scraping test results
305
+ """
306
+ results = {}
307
+
308
+ print("πŸ•·οΈ Testing Web Scraping...")
309
+ print("=" * 50)
310
+
311
+ try:
312
+ import requests
313
+ from bs4 import BeautifulSoup
314
+
315
+ # Test URLs (public, safe for testing)
316
+ test_urls = [
317
+ "https://httpbin.org/html",
318
+ "https://example.com",
319
+ "https://httpbin.org/json"
320
+ ]
321
+
322
+ successful_scrapes = 0
323
+
324
+ for url in test_urls:
325
+ try:
326
+ headers = {
327
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
328
+ }
329
+
330
+ response = requests.get(url, headers=headers, timeout=10)
331
+
332
+ if response.status_code == 200:
333
+ # Test HTML parsing
334
+ if 'html' in url:
335
+ soup = BeautifulSoup(response.content, 'html.parser')
336
+ text = soup.get_text()
337
+ if len(text) > 10:
338
+ successful_scrapes += 1
339
+ print(f"βœ… Successfully scraped HTML from {url}")
340
+ else:
341
+ if len(response.text) > 10:
342
+ successful_scrapes += 1
343
+ print(f"βœ… Successfully retrieved content from {url}")
344
+ else:
345
+ print(f"⚠️ HTTP {response.status_code} from {url}")
346
+
347
+ except Exception as e:
348
+ print(f"❌ Failed to scrape {url}: {e}")
349
+
350
+ if successful_scrapes >= 2:
351
+ print("βœ… Web scraping functionality working")
352
+ results['scraping_works'] = True
353
+ else:
354
+ print("❌ Web scraping has issues")
355
+ results['scraping_works'] = False
356
+
357
+ results['successful_scrapes'] = successful_scrapes
358
+ results['total_tests'] = len(test_urls)
359
+
360
+ except ImportError as e:
361
+ print(f"❌ Required packages not available: {e}")
362
+ results['scraping_works'] = False
363
+
364
+ print("\n" + "=" * 50)
365
+ return results
366
+
367
+ def test_complete_workflow() -> Dict[str, any]:
368
+ """
369
+ πŸ”„ Test complete dataset creation workflow
370
+
371
+ Returns:
372
+ Dict with workflow test results
373
+ """
374
+ results = {
375
+ 'project_creation': False,
376
+ 'source_discovery': False,
377
+ 'data_scraping': False,
378
+ 'data_processing': False,
379
+ 'data_export': False,
380
+ 'total_time': 0
381
+ }
382
+
383
+ print("πŸ”„ Testing Complete Workflow...")
384
+ print("=" * 50)
385
+
386
+ start_time = time.time()
387
+
388
+ try:
389
+ # Import the main studio class
390
+ from app import DatasetStudio
391
+
392
+ # Test 1: Initialize studio
393
+ studio = DatasetStudio()
394
+ print("βœ… Dataset Studio initialized")
395
+
396
+ # Test 2: Create project
397
+ project_status = studio.create_project(
398
+ name="Test Project",
399
+ template="sentiment_analysis",
400
+ description="Test project for workflow validation"
401
+ )
402
+
403
+ if "βœ…" in project_status:
404
+ print("βœ… Project creation successful")
405
+ results['project_creation'] = True
406
+ else:
407
+ print("❌ Project creation failed")
408
+ return results
409
+
410
+ # Test 3: AI source discovery (if available)
411
+ if studio.perplexity_client:
412
+ discovery_status, sources_json = studio.discover_sources_with_ai(
413
+ project_description="Product reviews for sentiment analysis testing",
414
+ max_sources=5,
415
+ search_type="general"
416
+ )
417
+
418
+ if "βœ…" in discovery_status and sources_json != "[]":
419
+ print("βœ… AI source discovery successful")
420
+ results['source_discovery'] = True
421
+
422
+ # Extract URLs for scraping test
423
+ test_urls = studio.extract_urls_from_sources(sources_json)
424
+ if test_urls:
425
+ test_urls = test_urls[:2] # Limit to 2 for testing
426
+ else:
427
+ print("⚠️ AI source discovery didn't find sources, using fallback")
428
+ test_urls = ["https://httpbin.org/html"]
429
+ else:
430
+ print("⚠️ Perplexity not available, using test URLs")
431
+ test_urls = ["https://httpbin.org/html"]
432
+
433
+ # Test 4: Data scraping
434
+ if test_urls:
435
+ scrape_status, scraped_data = studio.scrape_urls('\n'.join(test_urls))
436
+
437
+ if "βœ…" in scrape_status:
438
+ print("βœ… Data scraping successful")
439
+ results['data_scraping'] = True
440
+ else:
441
+ print("❌ Data scraping failed")
442
+ return results
443
+
444
+ # Test 5: Data processing
445
+ if studio.scraped_data:
446
+ process_status, processed_data = studio.process_data("sentiment_analysis")
447
+
448
+ if "βœ…" in process_status:
449
+ print("βœ… Data processing successful")
450
+ results['data_processing'] = True
451
+ else:
452
+ print("⚠️ Data processing had issues but continued")
453
+ results['data_processing'] = True # Allow partial success
454
+
455
+ # Test 6: Data export
456
+ if studio.processed_data:
457
+ export_status, file_path = studio.export_dataset("JSON")
458
+
459
+ if "βœ…" in export_status and file_path:
460
+ print("βœ… Data export successful")
461
+ results['data_export'] = True
462
+ else:
463
+ print("❌ Data export failed")
464
+
465
+ except Exception as e:
466
+ print(f"❌ Workflow test failed: {e}")
467
+ logger.exception("Workflow test error")
468
+
469
+ results['total_time'] = time.time() - start_time
470
+ print(f"⏱️ Total workflow time: {results['total_time']:.1f} seconds")
471
+
472
+ print("\n" + "=" * 50)
473
+ return results
474
+
475
+ def run_performance_benchmark() -> Dict[str, float]:
476
+ """
477
+ ⚑ Run performance benchmarks
478
+
479
+ Returns:
480
+ Dict with performance metrics
481
+ """
482
+ results = {}
483
+
484
+ print("⚑ Running Performance Benchmarks...")
485
+ print("=" * 50)
486
+
487
+ try:
488
+ # Test 1: API response time
489
+ if os.getenv('PERPLEXITY_API_KEY'):
490
+ from perplexity_client import PerplexityClient
491
+
492
+ client = PerplexityClient()
493
+ start_time = time.time()
494
+
495
+ test_result = client.discover_sources(
496
+ "Performance test query for machine learning",
497
+ max_sources=5
498
+ )
499
+
500
+ api_time = time.time() - start_time
501
+ results['api_response_time'] = api_time
502
+ print(f"🧠 Perplexity API response time: {api_time:.2f}s")
503
+
504
+ # Test 2: Model loading time
505
+ try:
506
+ from transformers import pipeline
507
+
508
+ start_time = time.time()
509
+ sentiment_analyzer = pipeline("sentiment-analysis")
510
+ model_load_time = time.time() - start_time
511
+
512
+ results['model_load_time'] = model_load_time
513
+ print(f"πŸ€– Model loading time: {model_load_time:.2f}s")
514
+
515
+ # Test 3: Processing speed
516
+ test_texts = [
517
+ "This is a great product!",
518
+ "I really don't like this item.",
519
+ "This product is okay, nothing special.",
520
+ "Amazing quality and fast delivery!",
521
+ "Terrible experience, would not recommend."
522
+ ]
523
+
524
+ start_time = time.time()
525
+ for text in test_texts:
526
+ sentiment_analyzer(text)
527
+ processing_time = time.time() - start_time
528
+
529
+ results['processing_speed'] = len(test_texts) / processing_time
530
+ print(f"πŸš€ Processing speed: {results['processing_speed']:.1f} items/second")
531
+
532
+ except ImportError:
533
+ print("⚠️ Cannot test model performance - transformers not available")
534
+
535
+ # Test 4: Memory usage (basic estimation)
536
+ import psutil
537
+ import os
538
+
539
+ process = psutil.Process(os.getpid())
540
+ memory_mb = process.memory_info().rss / 1024 / 1024
541
+ results['memory_usage_mb'] = memory_mb
542
+ print(f"πŸ’Ύ Current memory usage: {memory_mb:.1f} MB")
543
+
544
+ except Exception as e:
545
+ print(f"⚠️ Performance benchmark error: {e}")
546
+
547
+ print("\n" + "=" * 50)
548
+ return results
549
+
550
+ def generate_test_report(
551
+ env_results: Dict,
552
+ api_results: Dict,
553
+ model_results: Dict,
554
+ scraping_results: Dict,
555
+ workflow_results: Dict,
556
+ performance_results: Dict
557
+ ) -> str:
558
+ """
559
+ πŸ“Š Generate comprehensive test report
560
+
561
+ Returns:
562
+ Formatted test report as string
563
+ """
564
+ report = []
565
+ report.append("πŸš€ AI Dataset Studio - Test Report")
566
+ report.append("=" * 60)
567
+ report.append(f"πŸ“… Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
568
+ report.append("")
569
+
570
+ # Environment Summary
571
+ report.append("πŸ” ENVIRONMENT SETUP")
572
+ report.append("-" * 30)
573
+
574
+ env_score = sum(1 for v in env_results.values() if v) / len(env_results) * 100
575
+ report.append(f"Overall Score: {env_score:.0f}%")
576
+
577
+ if env_results.get('env_perplexity_api_key'):
578
+ report.append("βœ… Perplexity API configured")
579
+ else:
580
+ report.append("❌ Perplexity API not configured")
581
+
582
+ required_packages = ['package_gradio', 'package_requests', 'package_pandas', 'package_beautifulsoup4']
583
+ missing_required = [p for p in required_packages if not env_results.get(p)]
584
+
585
+ if not missing_required:
586
+ report.append("βœ… All required packages available")
587
+ else:
588
+ report.append(f"❌ Missing required packages: {missing_required}")
589
+
590
+ report.append("")
591
+
592
+ # API Summary
593
+ report.append("🧠 PERPLEXITY AI INTEGRATION")
594
+ report.append("-" * 30)
595
+
596
+ if api_results.get('api_key_valid'):
597
+ report.append("βœ… API key valid and working")
598
+
599
+ if api_results.get('connection_successful'):
600
+ report.append("βœ… API connection successful")
601
+
602
+ if api_results.get('response_quality'):
603
+ report.append("βœ… Response quality good")
604
+
605
+ if api_results.get('rate_limiting'):
606
+ report.append("βœ… Rate limiting active")
607
+ else:
608
+ report.append("❌ API integration not working")
609
+
610
+ report.append("")
611
+
612
+ # Models Summary
613
+ report.append("πŸ€– AI MODELS")
614
+ report.append("-" * 30)
615
+
616
+ if model_results.get('transformers_available', True):
617
+ working_models = sum(1 for k, v in model_results.items() if k.endswith('_model') and v)
618
+ total_models = sum(1 for k in model_results.keys() if k.endswith('_model'))
619
+
620
+ report.append(f"Working Models: {working_models}/{total_models}")
621
+
622
+ if model_results.get('gpu_available'):
623
+ report.append("βœ… GPU acceleration available")
624
+ else:
625
+ report.append("⚠️ CPU-only processing")
626
+ else:
627
+ report.append("❌ AI models not available")
628
+
629
+ report.append("")
630
+
631
+ # Workflow Summary
632
+ report.append("πŸ”„ COMPLETE WORKFLOW")
633
+ report.append("-" * 30)
634
+
635
+ workflow_steps = ['project_creation', 'source_discovery', 'data_scraping', 'data_processing', 'data_export']
636
+ working_steps = sum(1 for step in workflow_steps if workflow_results.get(step))
637
+
638
+ report.append(f"Working Steps: {working_steps}/{len(workflow_steps)}")
639
+ report.append(f"Total Time: {workflow_results.get('total_time', 0):.1f} seconds")
640
+
641
+ if working_steps >= 4:
642
+ report.append("βœ… Workflow fully functional")
643
+ elif working_steps >= 2:
644
+ report.append("⚠️ Workflow partially functional")
645
+ else:
646
+ report.append("❌ Workflow has major issues")
647
+
648
+ report.append("")
649
+
650
+ # Performance Summary
651
+ report.append("⚑ PERFORMANCE METRICS")
652
+ report.append("-" * 30)
653
+
654
+ if 'api_response_time' in performance_results:
655
+ api_time = performance_results['api_response_time']
656
+ if api_time < 10:
657
+ report.append(f"βœ… API response time: {api_time:.1f}s (good)")
658
+ elif api_time < 20:
659
+ report.append(f"⚠️ API response time: {api_time:.1f}s (acceptable)")
660
+ else:
661
+ report.append(f"❌ API response time: {api_time:.1f}s (slow)")
662
+
663
+ if 'processing_speed' in performance_results:
664
+ speed = performance_results['processing_speed']
665
+ if speed > 2:
666
+ report.append(f"βœ… Processing speed: {speed:.1f} items/sec (good)")
667
+ elif speed > 0.5:
668
+ report.append(f"⚠️ Processing speed: {speed:.1f} items/sec (acceptable)")
669
+ else:
670
+ report.append(f"❌ Processing speed: {speed:.1f} items/sec (slow)")
671
+
672
+ if 'memory_usage_mb' in performance_results:
673
+ memory = performance_results['memory_usage_mb']
674
+ report.append(f"πŸ’Ύ Memory usage: {memory:.0f} MB")
675
+
676
+ report.append("")
677
+
678
+ # Overall Assessment
679
+ report.append("🎯 OVERALL ASSESSMENT")
680
+ report.append("-" * 30)
681
+
682
+ total_score = 0
683
+ max_score = 0
684
+
685
+ # Calculate scores
686
+ if env_results.get('env_perplexity_api_key') and env_results.get('package_gradio'):
687
+ total_score += 25
688
+ max_score += 25
689
+
690
+ if api_results.get('api_key_valid') and api_results.get('connection_successful'):
691
+ total_score += 25
692
+ max_score += 25
693
+
694
+ if working_steps >= 3:
695
+ total_score += 25
696
+ max_score += 25
697
+
698
+ if model_results.get('sentiment_model', False) or not model_results.get('transformers_available', True):
699
+ total_score += 25
700
+ max_score += 25
701
+
702
+ overall_score = (total_score / max_score) * 100 if max_score > 0 else 0
703
+
704
+ if overall_score >= 80:
705
+ status = "βœ… EXCELLENT - Ready for production use"
706
+ elif overall_score >= 60:
707
+ status = "⚠️ GOOD - Minor issues to address"
708
+ elif overall_score >= 40:
709
+ status = "πŸ”§ FAIR - Several issues need fixing"
710
+ else:
711
+ status = "❌ POOR - Major setup problems"
712
+
713
+ report.append(f"Overall Score: {overall_score:.0f}%")
714
+ report.append(f"Status: {status}")
715
+
716
+ report.append("")
717
+ report.append("πŸ”§ NEXT STEPS")
718
+ report.append("-" * 30)
719
+
720
+ if not env_results.get('env_perplexity_api_key'):
721
+ report.append("1. Set PERPLEXITY_API_KEY environment variable")
722
+
723
+ if not api_results.get('api_key_valid'):
724
+ report.append("2. Verify Perplexity API key is correct")
725
+
726
+ if working_steps < 3:
727
+ report.append("3. Check error logs for workflow issues")
728
+
729
+ if not model_results.get('gpu_available', False) and model_results.get('transformers_available', True):
730
+ report.append("4. Consider upgrading to GPU hardware for better performance")
731
+
732
+ if overall_score >= 80:
733
+ report.append("πŸŽ‰ Your AI Dataset Studio is ready to create amazing datasets!")
734
+
735
+ return "\n".join(report)
736
+
737
+ def main():
738
+ """
739
+ πŸ§ͺ Run complete test suite
740
+ """
741
+ print("πŸ§ͺ AI Dataset Studio - Complete Test Suite")
742
+ print("=" * 60)
743
+ print("This will test all components of your deployment")
744
+ print("Please wait while tests are running...\n")
745
+
746
+ # Run all tests
747
+ env_results = test_environment_setup()
748
+ api_results = test_perplexity_api()
749
+ model_results = test_ai_models()
750
+ scraping_results = test_web_scraping()
751
+ workflow_results = test_complete_workflow()
752
+ performance_results = run_performance_benchmark()
753
+
754
+ # Generate report
755
+ report = generate_test_report(
756
+ env_results, api_results, model_results,
757
+ scraping_results, workflow_results, performance_results
758
+ )
759
+
760
+ # Save report
761
+ timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
762
+ report_filename = f"test_report_{timestamp}.txt"
763
+
764
+ try:
765
+ with open(report_filename, 'w', encoding='utf-8') as f:
766
+ f.write(report)
767
+ print(f"πŸ“„ Test report saved to: {report_filename}")
768
+ except Exception as e:
769
+ print(f"⚠️ Could not save report to file: {e}")
770
+
771
+ print("\n" + "=" * 60)
772
+ print(report)
773
+ print("=" * 60)
774
+
775
+ return {
776
+ 'environment': env_results,
777
+ 'api': api_results,
778
+ 'models': model_results,
779
+ 'scraping': scraping_results,
780
+ 'workflow': workflow_results,
781
+ 'performance': performance_results
782
+ }
783
+
784
+ if __name__ == "__main__":
785
+ # Run the complete test suite
786
+ test_results = main()