latterworks commited on
Commit
717a9f0
·
verified ·
1 Parent(s): e397d59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +492 -685
app.py CHANGED
@@ -1,72 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
- import sys
3
- import time
 
4
  import logging
5
- import datasets
6
  import shodan
7
- import asyncio
8
  import aiohttp
9
- import json
10
- import gradio as gr
11
- from typing import List, Dict, Any, Optional, Tuple, Set, Union
12
- from concurrent.futures import ThreadPoolExecutor
13
 
14
- # Configure production-grade logging
15
  logging.basicConfig(
16
  level=logging.INFO,
17
- format='%(asctime)s - %(name)s - %(levelname)s [%(filename)s:%(lineno)d] - %(message)s',
18
- handlers=[
19
- logging.StreamHandler(),
20
- logging.FileHandler("ollama_scanner.log")
21
- ]
22
  )
23
  logger = logging.getLogger(__name__)
24
 
25
- def load_or_create_dataset():
 
 
26
  """
27
- Load dataset from HuggingFace with optimized error handling and authentication.
 
 
 
 
28
 
29
  Returns:
30
- Dataset: The loaded dataset object ready for query operations
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  Raises:
33
- ValueError: When authentication fails or dataset structure is invalid
34
- ConnectionError: When network issues prevent dataset access
35
  """
36
- # HF token must exist for private dataset access
37
- hf_token = os.getenv("HF_TOKEN")
38
- if not hf_token:
39
- raise ValueError("HF_TOKEN environment variable missing or empty - authentication required")
 
 
 
 
 
 
 
40
 
41
- dataset_id = "latterworks/llama_checker_results"
42
- logger.info(f"Initializing dataset access: {dataset_id}")
 
 
 
 
 
43
 
 
 
 
 
 
 
44
  try:
45
- # First attempt: Try modern token parameter
46
- try:
47
- dataset = datasets.load_dataset(dataset_id, token=hf_token)
48
- except TypeError:
49
- # Fallback: Use legacy authentication parameter
50
- logger.info("Attempting legacy authentication method")
51
- dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
52
 
53
- # Extract the appropriate split
54
- if isinstance(dataset, datasets.DatasetDict):
55
- if "train" in dataset:
56
- return dataset["train"]
57
- # No train split found, use first available
58
- first_split = next(iter(dataset))
59
- logger.info(f"No 'train' split found, using '{first_split}' split")
60
- return dataset[first_split]
61
- else:
62
- # Handle direct Dataset object (no splits)
63
- return dataset
64
-
65
  except FileNotFoundError:
66
- logger.info(f"Dataset {dataset_id} not found - creating new dataset")
67
-
68
- # Prepare empty dataset with precise schema
69
- empty_dataset = datasets.Dataset.from_dict({
 
 
 
 
70
  "ip": [],
71
  "port": [],
72
  "country": [],
@@ -74,728 +159,450 @@ def load_or_create_dataset():
74
  "org": [],
75
  "models": []
76
  })
 
 
77
 
78
- try:
79
- # Create dataset on Hub with correct token parameter
80
- empty_dataset.push_to_hub(dataset_id, token=hf_token)
81
- logger.info(f"Successfully created empty dataset: {dataset_id}")
82
-
83
- # Load the newly created dataset
84
- try:
85
- dataset = datasets.load_dataset(dataset_id, token=hf_token)
86
- except TypeError:
87
- dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
88
-
89
- # Extract appropriate split
90
- if isinstance(dataset, datasets.DatasetDict):
91
- if "train" in dataset:
92
- return dataset["train"]
93
- first_split = next(iter(dataset))
94
- logger.info(f"Using '{first_split}' split from newly created dataset")
95
- return dataset[first_split]
96
- else:
97
- return dataset
98
-
99
- except Exception as creation_error:
100
- logger.error(f"Dataset creation failed: {creation_error}")
101
- raise ValueError(f"Failed to create dataset: {creation_error}") from creation_error
102
-
103
- except (ConnectionError, TimeoutError) as network_error:
104
- logger.error(f"Network error accessing dataset: {network_error}")
105
- raise ConnectionError(f"Network failure accessing HuggingFace Hub: {network_error}") from network_error
106
-
107
- except Exception as general_error:
108
- logger.error(f"Unexpected error accessing dataset: {general_error}")
109
- raise ValueError(f"Dataset access failed: {general_error}") from general_error
110
 
111
- def scan_shodan(progress=gr.Progress()) -> List[Dict]:
112
  """
113
- Scan Shodan for Ollama instances using search_cursor for comprehensive result retrieval.
114
 
115
  Args:
116
- progress: Gradio progress bar for visual feedback
 
 
117
 
118
  Returns:
119
- List of Ollama instances from Shodan with comprehensive metadata
120
  """
121
- # API key fetch - no validation needed as it's centralized at startup
122
- api_key = os.getenv("SHODAN_API_KEY")
123
- shodan_query = os.getenv("SHODAN_QUERY", "product:Ollama port:11434")
124
-
125
- api = shodan.Shodan(api_key)
126
-
127
- try:
128
- logger.info(f"Executing Shodan search_cursor with query: {shodan_query}")
129
-
130
- # Use search_cursor to handle pagination automatically
131
- cursor = api.search_cursor(shodan_query)
132
-
133
- # Initialize scan metrics
134
- instances = []
135
- processed = 0
136
- batch_size = 100 # Process results in batches for progress updates
137
-
138
- progress(0, desc="Initializing Shodan data retrieval")
139
-
140
- # Process all results from the cursor
141
- results_batch = []
142
- for result in cursor:
143
- results_batch.append(result)
144
- processed += 1
145
-
146
- # Process in batches for efficiency
147
- if len(results_batch) >= batch_size:
148
- progress(min(1.0, processed / (processed + 100)), desc=f"Retrieved {processed} Ollama instances")
149
-
150
- # Extract instance data from batch
151
- for result in results_batch:
152
- instances.append({
153
- 'ip': result.get('ip_str'),
154
- 'port': result.get('port', 11434),
155
- 'country': result.get('location', {}).get('country_name'),
156
- 'region': result.get('location', {}).get('region_name'),
157
- 'org': result.get('org'),
158
- 'models': []
159
- })
160
- results_batch = []
161
-
162
- # Process any remaining results
163
- if results_batch:
164
- for result in results_batch:
165
- instances.append({
166
- 'ip': result.get('ip_str'),
167
- 'port': result.get('port', 11434),
168
- 'country': result.get('location', {}).get('country_name'),
169
- 'region': result.get('location', {}).get('region_name'),
170
- 'org': result.get('org'),
171
- 'models': []
172
- })
173
-
174
- logger.info(f"Completed Shodan scan, retrieved {len(instances)} Ollama instances")
175
- return instances
176
-
177
- except shodan.APIError as e:
178
- error_msg = str(e)
179
- if "Invalid API key" in error_msg:
180
- logger.error(f"Shodan authentication failed: Invalid API key")
181
- raise ValueError("Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable.")
182
- elif "Request rate limit reached" in error_msg:
183
- logger.error(f"Shodan rate limit exceeded: {e}")
184
- raise ValueError("Shodan API rate limit exceeded. Please wait before trying again.")
185
- else:
186
- logger.error(f"Shodan API error: {e}")
187
- raise
188
- except Exception as e:
189
- logger.error(f"Unhandled exception during Shodan scan: {e}")
190
- raise
191
-
192
- async def check_single_endpoint(session, instance):
193
- """Check a single Ollama endpoint for available models."""
194
- ip = instance['ip']
195
- port = instance['port']
196
  url = f"http://{ip}:{port}/api/tags"
197
 
198
  try:
199
- logger.info(f"Checking Ollama endpoint: {url}")
200
-
201
- # Set a timeout for the request
202
- async with session.get(url, timeout=5) as response:
203
- if response.status == 200:
204
- data = await response.json()
205
- models = data.get('models', [])
206
- logger.info(f"Found {len(models)} models at {url}")
207
- instance['models'] = models
208
- return instance
209
- else:
210
- logger.warning(f"Failed to get models from {url} - Status: {response.status}")
211
- return instance
212
  except asyncio.TimeoutError:
213
- logger.warning(f"Timeout connecting to {url}")
214
- return instance
215
  except Exception as e:
216
- logger.error(f"Error checking {url}: {e}")
217
- return instance
 
218
 
219
- async def check_ollama_endpoints(instances, progress=gr.Progress()):
 
220
  """
221
- Efficiently check multiple Ollama endpoints with concurrent processing and comprehensive error handling.
 
222
 
223
  Args:
224
- instances: List of Ollama instances from Shodan
225
- progress: Gradio progress bar for visual feedback
226
 
227
  Returns:
228
- List of Ollama instances with enriched model information
229
  """
230
- if not instances:
231
- logger.info("No instances to check - skipping endpoint verification")
232
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- total_instances = len(instances)
235
- logger.info(f"Initiating concurrent validation of {total_instances} Ollama endpoints")
236
- progress(0, desc=f"Preparing to check {total_instances} Ollama endpoints")
237
 
238
- # Configure optimized session with connection pooling and timeouts
239
- conn = aiohttp.TCPConnector(limit=50, ttl_dns_cache=300)
240
- timeout = aiohttp.ClientTimeout(total=30, connect=5, sock_connect=5, sock_read=20)
241
 
242
- async with aiohttp.ClientSession(connector=conn, timeout=timeout) as session:
243
- # Create task queue
244
- tasks = [check_single_endpoint(session, instance) for instance in instances]
245
-
246
- # Process with dynamic progress tracking
247
- updated_instances = []
248
- completed = 0
249
-
250
- for future in asyncio.as_completed(tasks):
251
- try:
252
- # Process completed task
253
- instance = await future
254
- updated_instances.append(instance)
255
-
256
- # Update progress with meaningful metrics
257
- completed += 1
258
- progress_pct = completed / total_instances
259
- progress(progress_pct, desc=f"Checked {completed}/{total_instances} endpoints ({progress_pct:.1%})")
260
-
261
- # Log models found
262
- if instance.get('models'):
263
- logger.info(f"Found {len(instance['models'])} models at {instance['ip']}:{instance['port']}")
264
-
265
- except Exception as task_error:
266
- # Handle per-task errors without stopping the process
267
- logger.warning(f"Endpoint check failed: {task_error}")
268
- # Continue processing remaining endpoints
269
-
270
- valid_instances = [i for i in updated_instances if i.get('models')]
271
- logger.info(f"Endpoint validation complete: {len(valid_instances)}/{total_instances} accessible")
272
- return updated_instances
273
 
274
- def update_dataset_with_instances(dataset, instances):
 
275
  """
276
- Efficiently update HuggingFace dataset with optimized delta synchronization.
277
-
278
- Implements single-pass dataset updates with:
279
- 1. Optimized in-memory index of existing entries
280
- 2. Differential detection of new vs. modified instances
281
- 3. Single hub push with consolidated changes
282
 
283
  Args:
284
- dataset: HuggingFace dataset object to update
285
- instances: List of Ollama instances with model information
286
 
287
  Returns:
288
- Updated HuggingFace dataset with synchronized changes
289
  """
290
- if not instances:
291
- logger.warning("No instance data provided for dataset update operation")
292
- return dataset
293
-
294
- start_time = time.time()
295
-
296
- # Optimization: Create indexed lookup of existing instances for O(1) access
297
- dataset_dict = {}
298
- for idx, item in enumerate(dataset):
299
- key = f"{item['ip']}:{item['port']}"
300
- dataset_dict[key] = {
301
- 'idx': idx,
302
- 'data': item
303
- }
304
-
305
- # Track modification metrics
306
- stats = {
307
- 'new': 0,
308
- 'updated': 0,
309
- 'unchanged': 0,
310
- 'models_added': 0
311
- }
312
-
313
- # Process differentials
314
- update_candidates = []
315
- new_instances = []
316
-
317
- for instance in instances:
318
- # Skip instances without valid IP
319
- if not instance.get('ip'):
320
- continue
321
-
322
- instance_key = f"{instance['ip']}:{instance['port']}"
323
 
324
- if instance_key in dataset_dict:
325
- # Existing instance - determine if update needed
326
- existing = dataset_dict[instance_key]['data']
327
- needs_update = False
328
-
329
- # Check metadata changes
330
- for field in ['country', 'region', 'org']:
331
- if instance.get(field) and instance.get(field) != existing.get(field):
332
- needs_update = True
333
-
334
- # Check model changes - only update if models were found
335
- if instance.get('models'):
336
- # Compare model signatures to detect changes
337
- existing_models = {model.get('name', ''): model for model in existing.get('models', [])}
338
- new_models = {model.get('name', ''): model for model in instance.get('models', [])}
339
-
340
- if set(new_models.keys()) != set(existing_models.keys()):
341
- needs_update = True
342
- stats['models_added'] += len(set(new_models.keys()) - set(existing_models.keys()))
343
-
344
- if needs_update:
345
- # Create updated instance
346
- updated = dict(existing)
347
- updated.update({
348
- 'country': instance.get('country', existing.get('country')),
349
- 'region': instance.get('region', existing.get('region')),
350
- 'org': instance.get('org', existing.get('org')),
351
- })
352
-
353
- # Only update models if they were found
354
- if instance.get('models'):
355
- updated['models'] = instance['models']
356
-
357
- update_candidates.append(updated)
358
- stats['updated'] += 1
359
- else:
360
- stats['unchanged'] += 1
361
- else:
362
- # New instance
363
- new_instances.append(instance)
364
- stats['new'] += 1
365
-
366
- # Efficiently construct updated dataset
367
- if new_instances or update_candidates:
368
- # Start with current dataset
369
- current_data = dataset.to_list()
370
 
371
- # Apply updates
372
- for updated in update_candidates:
373
- instance_key = f"{updated['ip']}:{updated['port']}"
374
- idx = dataset_dict[instance_key]['idx']
375
- current_data[idx] = updated
376
 
377
- # Add new instances
378
- current_data.extend(new_instances)
379
 
380
- # Create updated dataset
381
- updated_dataset = datasets.Dataset.from_list(current_data)
 
382
 
383
- # Push to hub with single operation
384
- hf_token = os.getenv("HF_TOKEN")
385
- updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
386
 
387
- execution_time = time.time() - start_time
388
- logger.info(f"Dataset synchronization complete in {execution_time:.2f}s: {stats['new']} new, {stats['updated']} updated, {stats['unchanged']} unchanged, {stats['models_added']} new models")
389
 
390
- return updated_dataset
391
- else:
392
- logger.info("No dataset changes detected - skipping hub synchronization")
393
- return dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
- def get_unique_values(dataset):
396
  """
397
- Get unique values for model attributes to populate dropdown filters.
398
 
399
  Args:
400
- dataset: HuggingFace dataset
 
401
 
402
  Returns:
403
- Dictionary with unique values for each attribute
404
  """
405
- # Initialize empty sets
406
- families = set()
407
- parameter_sizes = set()
408
-
409
- # Extract unique values from models
410
- for instance in dataset:
411
- for model in instance.get('models', []):
412
- details = model.get('details', {})
413
-
414
- # Handle both direct details in the model and nested details
415
- if isinstance(details, dict):
416
- family = details.get('family')
417
- parameter_size = details.get('parameter_size')
418
- else:
419
- family = model.get('family')
420
- parameter_size = model.get('parameter_size')
421
-
422
- if family:
423
- families.add(family)
424
-
425
- if parameter_size:
426
- parameter_sizes.add(parameter_size)
427
 
428
- return {
429
- 'families': sorted(list(families)),
430
- 'parameter_sizes': sorted(list(parameter_sizes))
431
- }
 
 
 
 
 
 
 
 
 
 
432
 
433
- def search_models(dataset, family=None, parameter_size=None, name_search=None, is_admin=False):
434
  """
435
  Search for models in the dataset based on filters.
 
436
 
437
  Args:
438
- dataset: HuggingFace dataset
439
- family: Filter by model family
440
- parameter_size: Filter by parameter size
441
- name_search: Filter by model name (substring match)
442
- is_admin: Whether to include IP and port information
443
 
444
  Returns:
445
- List of dictionaries with model information
446
  """
447
- results = []
448
-
449
- for instance in dataset:
450
- ip = instance.get('ip')
451
- port = instance.get('port')
452
- country = instance.get('country')
453
- region = instance.get('region')
454
- org = instance.get('org')
 
 
 
 
 
 
455
 
456
- for model in instance.get('models', []):
457
- # Extract model details
458
- model_name = model.get('name', '')
459
-
460
- # Handle both direct details in the model and nested details
461
- details = model.get('details', {})
462
- if isinstance(details, dict):
463
- model_family = details.get('family', '')
464
- model_param_size = details.get('parameter_size', '')
465
- model_quant_level = details.get('quantization_level', '')
466
- else:
467
- model_family = model.get('family', '')
468
- model_param_size = model.get('parameter_size', '')
469
- model_quant_level = model.get('quantization_level', '')
470
-
471
- model_size_bytes = model.get('size', 0)
472
- model_size_gb = model_size_bytes / (1024 * 1024 * 1024) if model_size_bytes else 0
473
 
474
  # Apply filters
475
- if family and model_family != family:
476
  continue
477
-
478
- if parameter_size and model_param_size != parameter_size:
479
  continue
480
-
481
- if name_search and name_search.lower() not in model_name.lower():
482
  continue
483
 
484
- # Create result object
485
- result = {
486
- 'name': model_name,
487
- 'family': model_family,
488
- 'parameter_size': model_param_size,
489
- 'quantization_level': model_quant_level,
490
- 'size_gb': round(model_size_gb, 2),
491
- 'country': country,
492
- 'region': region,
493
- 'org': org,
494
  }
495
 
496
- # Include full model info for details view
497
- result['full_model_info'] = json.dumps(model, indent=2)
 
 
 
 
498
 
499
- # Include IP and port for admin users only
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  if is_admin:
501
- result['ip'] = ip
502
- result['port'] = port
503
 
504
- results.append(result)
505
 
506
- return results
507
 
508
- def create_interface():
509
  """
510
- Create enterprise-grade Gradio interface with optimized data loading and admin authentication.
511
 
512
  Returns:
513
- gr.Blocks: Fully configured Gradio interface ready for deployment
514
  """
515
- # Administrative authentication function
516
- def validate_admin():
517
- """Check if current user has admin privileges based on API key"""
518
- # For production systems, this would use proper authentication
519
- # Currently using API key presence as simple auth mechanism
520
- admin_key = os.getenv("ADMIN_KEY", "")
521
- shodan_key = os.getenv("SHODAN_API_KEY", "")
522
- return bool(admin_key and shodan_key)
523
-
524
  try:
525
- # Initialize critical data structures once at startup
526
- logger.info("Initializing application data layer")
527
  dataset = load_or_create_dataset()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
 
529
- # Extract model metadata attributes for filtering
530
- unique_values = get_unique_values(dataset)
531
- logger.info(f"Loaded dataset with {len(unique_values['families'])} model families and {len(unique_values['parameter_sizes'])} parameter sizes")
532
-
533
- # Preload initial model data
534
- initial_results = search_models(dataset)
535
- logger.info(f"Preloaded {len(initial_results)} models for initial display")
536
-
537
- # Determine administrative access
538
- is_admin = validate_admin()
539
- admin_status = "enabled" if is_admin else "disabled"
540
- logger.info(f"Administrative access: {admin_status}")
541
-
542
- # Create interface with optimized structure
543
- with gr.Blocks(
544
- title="Ollama Instance Scanner",
545
- theme=gr.themes.Soft(),
546
- css=".footer {text-align: center; margin-top: 20px; color: #666;}"
547
- ) as interface:
548
- # Header section
549
- with gr.Row():
550
- with gr.Column():
551
- gr.Markdown("# Ollama Instance Scanner")
552
- gr.Markdown("Browse publicly accessible Ollama models and their capabilities")
553
-
554
- # Tab container
555
- with gr.Tabs() as tabs:
556
- # Tab 1: Model Browser (Public)
557
- with gr.TabItem("Browse Models"):
558
- with gr.Row():
559
- # Filter controls
560
- with gr.Column(scale=1):
561
- with gr.Box():
562
- gr.Markdown("### Search Filters")
563
- family_dropdown = gr.Dropdown(
564
- choices=["All"] + unique_values['families'],
565
- value="All",
566
- label="Model Family",
567
- interactive=True
568
- )
569
- parameter_size_dropdown = gr.Dropdown(
570
- choices=["All"] + unique_values['parameter_sizes'],
571
- value="All",
572
- label="Parameter Size",
573
- interactive=True
574
- )
575
- name_search = gr.Textbox(
576
- label="Model Name",
577
- placeholder="Enter model name...",
578
- interactive=True
579
- )
580
- search_button = gr.Button("Search Models", variant="primary")
581
-
582
- # Results section
583
- with gr.Row():
584
- # Model results table
585
- results_table = gr.DataFrame(
586
- value=initial_results,
587
- headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
588
- label="Available Models",
589
- interactive=False,
590
- wrap=True
591
  )
592
-
593
- # Details section
594
- with gr.Row():
595
- # Model specifications panel
596
- model_details = gr.JSON(
597
- label="Model Specifications",
598
- visible=True
599
  )
 
600
 
601
- # Tab 2: Shodan Scanner (Admin Only)
602
- with gr.TabItem("Shodan Scan", visible=is_admin):
603
- with gr.Box():
604
- gr.Markdown("## Ollama Instance Scanner")
605
- gr.Markdown("This tool scans for publicly accessible Ollama instances using Shodan API")
606
-
607
- # Scanner controls
608
- with gr.Row():
609
- shodan_scan_button = gr.Button(
610
- "Start Shodan Scan",
611
- variant="primary",
612
- interactive=is_admin
613
- )
614
-
615
- # Status display
616
- with gr.Row():
617
- scan_status = gr.Textbox(
618
- label="Scan Status",
619
- value="Ready to scan" if is_admin else "Admin access required",
620
- interactive=False
621
- )
622
-
623
- # Footer
624
- gr.Markdown(
625
- "### Ollama Instance Scanner | Powered by Shodan & Hugging Face",
626
- elem_classes=["footer"]
627
- )
628
-
629
- # Define optimized event handlers
630
- def on_search_click(family, parameter_size, name_search):
631
- """Process model search with optimized filtering"""
632
- try:
633
- # Apply filters
634
- family_filter = None if family == "All" else family
635
- param_size_filter = None if parameter_size == "All" else parameter_size
636
- name_filter = None if not name_search else name_search.strip()
637
-
638
- # Execute search with admin privileges if available
639
- results = search_models(
640
- dataset,
641
- family_filter,
642
- param_size_filter,
643
- name_filter,
644
- is_admin
645
  )
646
-
647
- logger.info(f"Search completed: {len(results)} models found matching criteria")
648
- return results
649
- except Exception as search_error:
650
- logger.error(f"Search failed: {search_error}")
651
- # Return empty results on error
652
- return []
653
-
654
- def on_table_select(evt: gr.SelectData, results):
655
- """Handle table row selection with error protection"""
656
- try:
657
- if evt and evt.index and len(results) > evt.index[0]:
658
- selected_row = results[evt.index[0]]
659
- # Extract and return model details
660
- return selected_row.get('full_model_info', "{}")
661
- return "{}"
662
- except Exception as selection_error:
663
- logger.error(f"Selection error: {selection_error}")
664
- return "{}"
665
-
666
- async def run_shodan_scan():
667
- """Execute Shodan scan workflow with comprehensive monitoring"""
668
- if not is_admin:
669
- return "Error: Administrative access required"
670
 
671
- scan_id = int(time.time()) # Generate unique scan identifier
672
- logger.info(f"Initiating Shodan scan {scan_id}")
673
 
674
- try:
675
- # Phase 1: Shodan API scan
676
- instances = scan_shodan()
677
- if not instances:
678
- return "Scan complete: No Ollama instances found"
679
-
680
- instance_count = len(instances)
681
- logger.info(f"Scan {scan_id}: Found {instance_count} potential Ollama instances")
682
-
683
- # Phase 2: Endpoint validation
684
- updated_instances = await check_ollama_endpoints(instances)
685
- accessible_count = sum(1 for i in updated_instances if i.get('models'))
686
- logger.info(f"Scan {scan_id}: Validated {accessible_count} accessible instances")
687
-
688
- # Phase 3: Dataset synchronization
689
- nonlocal dataset
690
- dataset = update_dataset_with_instances(dataset, updated_instances)
691
-
692
- # Phase 4: Interface update
693
- nonlocal unique_values
694
- unique_values = get_unique_values(dataset)
695
-
696
- # Update UI components with new data
697
- family_dropdown.choices = ["All"] + unique_values['families']
698
- parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
 
 
 
 
 
 
699
 
700
- # Build detailed completion report
701
- report = (
702
- f"Scan {scan_id} completed successfully:\n"
703
- f"• {instance_count} total instances discovered\n"
704
- f"• {accessible_count} instances with accessible models\n"
705
- f"• {len(unique_values['families'])} unique model families\n"
706
- f"• {len(unique_values['parameter_sizes'])} parameter size variants"
707
  )
708
-
709
- logger.info(f"Scan {scan_id} completed successfully")
710
- return report
711
-
712
- except Exception as scan_error:
713
- logger.error(f"Scan {scan_id} failed: {scan_error}")
714
-
715
- # Generate actionable error message
716
- if isinstance(scan_error, ValueError) and "API key" in str(scan_error):
717
- return "Error: Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable."
718
- elif isinstance(scan_error, ConnectionError):
719
- return "Error: Network connectivity issue. Please check your internet connection."
720
- else:
721
- return f"Error: Scan operation failed - {str(scan_error)}"
722
-
723
- # Connect event handlers to UI components
724
- search_button.click(
725
- fn=on_search_click,
726
- inputs=[family_dropdown, parameter_size_dropdown, name_search],
727
- outputs=[results_table]
728
- )
729
 
730
- results_table.select(
731
- fn=on_table_select,
732
- inputs=[results_table],
733
- outputs=[model_details]
734
- )
 
 
 
735
 
736
- shodan_scan_button.click(
737
- fn=run_shodan_scan,
738
- inputs=[],
739
- outputs=[scan_status]
740
- )
741
 
742
- logger.info("Gradio interface successfully initialized")
743
- return interface
744
-
745
- except Exception as interface_error:
746
- logger.critical(f"Interface initialization failed: {interface_error}")
747
- raise ValueError(f"Failed to create application interface: {interface_error}") from interface_error
748
-
749
- def validate_env_variables():
750
- """
751
- Centralized validation of critical environment variables with precise error messaging.
752
-
753
- Raises:
754
- ValueError: When any required environment variable is missing
755
- """
756
- required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
757
- missing_vars = [var for var in required_vars if not os.getenv(var)]
758
-
759
- if missing_vars:
760
- error_msg = f"Missing critical environment variables: {', '.join(missing_vars)}"
761
- logger.critical(error_msg)
762
- raise ValueError(error_msg)
763
-
764
- # Validate token quality
765
- hf_token = os.getenv("HF_TOKEN")
766
- if len(hf_token) < 8: # Minimum length for plausible token
767
- logger.warning("HF_TOKEN appears malformed (insufficient length)")
768
 
769
- logger.info("Environment validation successful - all required variables present")
770
-
771
- def main():
772
- """
773
- Application entry point with centralized error handling and environment validation.
774
- """
775
- try:
776
- # Validate environment once at startup
777
- validate_env_variables()
778
-
779
- # Initialize and launch interface
780
- logger.info("Initializing Gradio interface")
781
- interface = create_interface()
782
-
783
- if interface:
784
- logger.info("Starting Gradio server")
785
- interface.launch()
786
- else:
787
- logger.critical("Interface initialization failed")
788
- sys.exit(1)
789
-
790
- except ValueError as config_error:
791
- # Handle configuration errors
792
- logger.critical(f"Configuration error: {config_error}")
793
- sys.exit(1)
794
-
795
- except Exception as fatal_error:
796
- # Handle unexpected errors
797
- logger.critical(f"Fatal application error: {fatal_error}")
798
- sys.exit(1)
799
 
 
800
  if __name__ == "__main__":
801
- main()
 
 
 
 
 
1
+ """
2
+ Ollama Instance & Model Scanner for Hugging Face Space
3
+
4
+ This application scans for publicly accessible Ollama instances, retrieves model information,
5
+ and provides a secure interface for browsing discovered models.
6
+
7
+ Security Architecture:
8
+ - Server-side authorization based on environment variables
9
+ - Strict input sanitization
10
+ - Comprehensive error handling
11
+ - Asynchronous endpoint checking
12
+ - Efficient dataset management
13
+ """
14
+
15
  import os
16
+ import re
17
+ import json
18
+ import asyncio
19
  import logging
20
+ import gradio as gr
21
  import shodan
 
22
  import aiohttp
23
+ from datasets import load_dataset, Dataset
24
+ from typing import Dict, List, Optional, Any, Tuple, Union
25
+ from datetime import datetime
26
+ from functools import wraps
27
 
28
+ # Configure logging
29
  logging.basicConfig(
30
  level=logging.INFO,
31
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
32
+ handlers=[logging.StreamHandler()]
 
 
 
33
  )
34
  logger = logging.getLogger(__name__)
35
 
36
+ # Security layer - Authorization functions
37
+
38
+ def authorization_required(func):
39
  """
40
+ Decorator that enforces server-side authorization for protected functions.
41
+ Authorization is determined by environment variables, not client parameters.
42
+
43
+ Args:
44
+ func: The function to protect with authorization
45
 
46
  Returns:
47
+ A wrapped function that performs authorization check
48
+ """
49
+ @wraps(func)
50
+ def wrapper(*args, **kwargs):
51
+ if not verify_admin_authorization():
52
+ logger.warning(f"Unauthorized access attempt to {func.__name__}")
53
+ return {"error": "Unauthorized access"} if kwargs.get("return_error", False) else None
54
+ return func(*args, **kwargs)
55
+ return wrapper
56
+
57
+ def verify_admin_authorization() -> bool:
58
+ """
59
+ Perform server-side verification of admin authorization.
60
+ Authorization is based on environment variables, not client data.
61
+
62
+ Returns:
63
+ bool: True if valid admin credentials exist
64
+ """
65
+ try:
66
+ # Check for the existence of the Shodan API key
67
+ api_key = os.getenv("SHODAN_API_KEY")
68
+ hf_token = os.getenv("HF_TOKEN")
69
+
70
+ return (api_key is not None and
71
+ len(api_key.strip()) > 10 and
72
+ hf_token is not None and
73
+ len(hf_token.strip()) > 10)
74
+ except Exception as e:
75
+ logger.error(f"Error verifying admin authorization: {str(e)}")
76
+ return False
77
+
78
+ # Security layer - Input validation
79
+
80
+ def sanitize_input(input_string: str) -> str:
81
+ """
82
+ Sanitize user input to prevent injection attacks.
83
+
84
+ Args:
85
+ input_string: User input string to sanitize
86
+
87
+ Returns:
88
+ str: Sanitized string
89
+ """
90
+ if not isinstance(input_string, str):
91
+ return ""
92
+
93
+ # Remove potentially harmful characters
94
+ sanitized = re.sub(r'[^\w\s\-\.]', '', input_string)
95
+ # Limit length to prevent DoS
96
+ return sanitized[:100]
97
+
98
+ def get_env_variables() -> Dict[str, str]:
99
+ """
100
+ Get all required environment variables.
101
+
102
+ Returns:
103
+ Dict[str, str]: Dictionary containing environment variables
104
 
105
  Raises:
106
+ ValueError: If any required environment variable is missing
 
107
  """
108
+ env_vars = {
109
+ "SHODAN_API_KEY": os.getenv("SHODAN_API_KEY"),
110
+ "SHODAN_QUERY": os.getenv("SHODAN_QUERY", "product:Ollama port:11434"),
111
+ "HF_TOKEN": os.getenv("HF_TOKEN")
112
+ }
113
+
114
+ missing_vars = [name for name, value in env_vars.items() if not value]
115
+ if missing_vars:
116
+ error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
117
+ logger.error(error_msg)
118
+ raise ValueError(error_msg)
119
 
120
+ return env_vars
121
+
122
+ # Data access layer
123
+
124
+ def load_or_create_dataset() -> Dataset:
125
+ """
126
+ Load the dataset from Hugging Face Hub or create it if it doesn't exist.
127
 
128
+ Returns:
129
+ Dataset: Loaded or created dataset
130
+
131
+ Raises:
132
+ Exception: If dataset loading or creation fails
133
+ """
134
  try:
135
+ # Attempt to get environment variables - this will raise ValueError if missing
136
+ env_vars = get_env_variables()
 
 
 
 
 
137
 
138
+ logger.info("Attempting to load dataset from Hugging Face Hub")
139
+ dataset = load_dataset("latterworks/llama_checker_results", use_auth_token=env_vars["HF_TOKEN"])
140
+ dataset = dataset['train']
141
+ logger.info(f"Successfully loaded dataset with {len(dataset)} entries")
142
+ return dataset
143
+ except ValueError as e:
144
+ # Re-raise environment variable errors
145
+ raise
 
 
 
 
146
  except FileNotFoundError:
147
+ # Only create dataset if admin authorization is verified
148
+ if not verify_admin_authorization():
149
+ logger.error("Unauthorized attempt to create dataset")
150
+ raise ValueError("Unauthorized: Only admins can create the dataset")
151
+
152
+ logger.info("Dataset not found, creating a new one")
153
+ env_vars = get_env_variables()
154
+ dataset = Dataset.from_dict({
155
  "ip": [],
156
  "port": [],
157
  "country": [],
 
159
  "org": [],
160
  "models": []
161
  })
162
+ dataset.push_to_hub("latterworks/llama_checker_results", token=env_vars["HF_TOKEN"])
163
+ logger.info("Created and pushed empty dataset to Hugging Face Hub")
164
 
165
+ # Reload the dataset to ensure consistency
166
+ dataset = load_dataset("latterworks/llama_checker_results", use_auth_token=env_vars["HF_TOKEN"])['train']
167
+ return dataset
168
+ except Exception as e:
169
+ error_msg = f"Failed to load or create dataset: {str(e)}"
170
+ logger.error(error_msg)
171
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
+ async def check_single_endpoint(ip: str, port: int, timeout: int = 5) -> Optional[List[Dict[str, Any]]]:
174
  """
175
+ Check a single Ollama endpoint for available models.
176
 
177
  Args:
178
+ ip: IP address of the Ollama instance
179
+ port: Port number of the Ollama instance
180
+ timeout: Timeout in seconds for the HTTP request
181
 
182
  Returns:
183
+ Optional[List[Dict[str, Any]]]: List of model information dictionaries, or None if endpoint check fails
184
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  url = f"http://{ip}:{port}/api/tags"
186
 
187
  try:
188
+ async with aiohttp.ClientSession() as session:
189
+ async with session.get(url, timeout=timeout) as response:
190
+ if response.status == 200:
191
+ data = await response.json()
192
+ if "models" in data and isinstance(data["models"], list):
193
+ logger.info(f"Successfully retrieved {len(data['models'])} models from {ip}:{port}")
194
+ return data["models"]
195
+ else:
196
+ logger.warning(f"Unexpected response format from {ip}:{port}")
197
+ else:
198
+ logger.warning(f"Received status code {response.status} from {ip}:{port}")
199
+ except aiohttp.ClientError as e:
200
+ logger.warning(f"Connection error for {ip}:{port}: {str(e)}")
201
  except asyncio.TimeoutError:
202
+ logger.warning(f"Connection timeout for {ip}:{port}")
 
203
  except Exception as e:
204
+ logger.warning(f"Unexpected error checking {ip}:{port}: {str(e)}")
205
+
206
+ return None
207
 
208
+ @authorization_required
209
+ async def check_ollama_endpoints(dataset: Dataset, progress: Optional[gr.Progress] = None) -> Dataset:
210
  """
211
+ Check all Ollama endpoints in the dataset for available models.
212
+ Requires admin authorization.
213
 
214
  Args:
215
+ dataset: Dataset containing Ollama endpoints
216
+ progress: Optional Gradio progress bar
217
 
218
  Returns:
219
+ Dataset: Updated dataset with model information
220
  """
221
+ if progress:
222
+ progress(0, desc="Preparing to check endpoints...")
223
+
224
+ # Build a list of tasks to execute
225
+ total_endpoints = len(dataset)
226
+ tasks = []
227
+
228
+ for i, item in enumerate(dataset):
229
+ ip = item["ip"]
230
+ port = item["port"]
231
+ tasks.append(check_single_endpoint(ip, port))
232
+
233
+ # Execute tasks in batches to avoid overwhelming resources
234
+ batch_size = 10
235
+ updated_dataset = dataset.copy()
236
+
237
+ for i in range(0, len(tasks), batch_size):
238
+ if progress:
239
+ progress(i / len(tasks), desc=f"Checking endpoints {i+1}-{min(i+batch_size, len(tasks))} of {len(tasks)}...")
240
+
241
+ batch_tasks = tasks[i:i+batch_size]
242
+ batch_results = await asyncio.gather(*batch_tasks)
243
+
244
+ for j, result in enumerate(batch_results):
245
+ idx = i + j
246
+ if idx < len(dataset):
247
+ if result:
248
+ updated_dataset = updated_dataset.add_item({
249
+ "ip": dataset[idx]["ip"],
250
+ "port": dataset[idx]["port"],
251
+ "country": dataset[idx]["country"],
252
+ "region": dataset[idx]["region"],
253
+ "org": dataset[idx]["org"],
254
+ "models": result
255
+ })
256
 
257
+ if progress:
258
+ progress(1.0, desc="Endpoint checking complete!")
 
259
 
260
+ logger.info(f"Checked {total_endpoints} endpoints, found models on {sum(1 for item in updated_dataset if item['models'])} endpoints")
 
 
261
 
262
+ # Push updated dataset to Hugging Face Hub
263
+ env_vars = get_env_variables()
264
+ updated_dataset.push_to_hub("latterworks/llama_checker_results", token=env_vars["HF_TOKEN"])
265
+ logger.info("Successfully pushed updated dataset to Hugging Face Hub")
266
+
267
+ return updated_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
 
269
+ @authorization_required
270
+ def scan_shodan(progress: Optional[gr.Progress] = None) -> str:
271
  """
272
+ Scan Shodan for Ollama instances and update the dataset.
273
+ Requires admin authorization.
 
 
 
 
274
 
275
  Args:
276
+ progress: Optional Gradio progress bar
 
277
 
278
  Returns:
279
+ str: Status message
280
  """
281
+ try:
282
+ # Get environment variables
283
+ env_vars = get_env_variables()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
+ # Load dataset
286
+ dataset = load_or_create_dataset()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
+ # Initialize Shodan API client
289
+ api = shodan.Shodan(env_vars["SHODAN_API_KEY"])
290
+ query = env_vars["SHODAN_QUERY"]
 
 
291
 
292
+ if progress:
293
+ progress(0, desc="Starting Shodan search...")
294
 
295
+ # Get total results count
296
+ count_result = api.count(query)
297
+ total_results = count_result.get('total', 0)
298
 
299
+ if total_results == 0:
300
+ return "No Ollama instances found on Shodan."
 
301
 
302
+ logger.info(f"Found {total_results} potential Ollama instances on Shodan")
 
303
 
304
+ # Search Shodan
305
+ new_instances = []
306
+ results_processed = 0
307
+
308
+ for result in api.search_cursor(query):
309
+ results_processed += 1
310
+
311
+ if progress:
312
+ progress(results_processed / total_results,
313
+ desc=f"Processing Shodan result {results_processed}/{total_results}")
314
+
315
+ ip = result.get('ip_str')
316
+ port = result.get('port', 11434)
317
+
318
+ # Skip if instance already exists in dataset
319
+ if any(item["ip"] == ip and item["port"] == port for item in dataset):
320
+ continue
321
+
322
+ # Extract location information
323
+ country = result.get('location', {}).get('country_name', '')
324
+ region = result.get('location', {}).get('region_name', '')
325
+ org = result.get('org', '')
326
+
327
+ new_instances.append({
328
+ "ip": ip,
329
+ "port": port,
330
+ "country": country,
331
+ "region": region,
332
+ "org": org,
333
+ "models": []
334
+ })
335
+
336
+ if progress:
337
+ progress(1.0, desc="Shodan search complete!")
338
+
339
+ # Add new instances to dataset
340
+ updated_dataset = dataset.copy()
341
+ for instance in new_instances:
342
+ updated_dataset = updated_dataset.add_item(instance)
343
+
344
+ logger.info(f"Added {len(new_instances)} new instances to dataset")
345
+
346
+ # Check Ollama endpoints asynchronously
347
+ if new_instances:
348
+ loop = asyncio.new_event_loop()
349
+ asyncio.set_event_loop(loop)
350
+ updated_dataset = loop.run_until_complete(check_ollama_endpoints(updated_dataset, progress))
351
+ loop.close()
352
+
353
+ status_message = f"Scan complete! Found {len(new_instances)} new Ollama instances."
354
+ return status_message
355
+
356
+ except shodan.APIError as e:
357
+ error_msg = f"Shodan API error: {str(e)}"
358
+ logger.error(error_msg)
359
+ return error_msg
360
+ except Exception as e:
361
+ error_msg = f"Error during Shodan scan: {str(e)}"
362
+ logger.error(error_msg)
363
+ return error_msg
364
 
365
+ def get_unique_values(dataset: Dataset, field: str) -> List[str]:
366
  """
367
+ Get unique values for a specific field in the dataset.
368
 
369
  Args:
370
+ dataset: Dataset to extract values from
371
+ field: Field name to extract values from
372
 
373
  Returns:
374
+ List[str]: List of unique values
375
  """
376
+ unique_values = set()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
+ if field == "family" or field == "parameter_size" or field == "quantization_level":
379
+ for item in dataset:
380
+ models = item.get("models", [])
381
+ if not models:
382
+ continue
383
+
384
+ for model in models:
385
+ details = model.get("details", {})
386
+ if details and field in details:
387
+ value = details.get(field)
388
+ if value:
389
+ unique_values.add(value)
390
+
391
+ return sorted(list(unique_values))
392
 
393
+ def search_models(dataset: Dataset, name_search: str = "", family: str = "", parameter_size: str = "") -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
394
  """
395
  Search for models in the dataset based on filters.
396
+ Authorization is determined server-side.
397
 
398
  Args:
399
+ dataset: Dataset to search
400
+ name_search: Model name search string
401
+ family: Model family filter
402
+ parameter_size: Parameter size filter
 
403
 
404
  Returns:
405
+ Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: Filtered model list and detailed model list
406
  """
407
+ # Server-side authorization check
408
+ is_admin = verify_admin_authorization()
409
+
410
+ name_search = sanitize_input(name_search).lower()
411
+ family = sanitize_input(family)
412
+ parameter_size = sanitize_input(parameter_size)
413
+
414
+ filtered_models = []
415
+ detailed_models = []
416
+
417
+ for item in dataset:
418
+ models = item.get("models", [])
419
+ if not models:
420
+ continue
421
 
422
+ ip = item.get("ip", "")
423
+ port = item.get("port", 0)
424
+ country = item.get("country", "")
425
+ region = item.get("region", "")
426
+ org = item.get("org", "")
427
+
428
+ for model in models:
429
+ model_name = model.get("name", "").lower()
430
+ details = model.get("details", {})
431
+ model_family = details.get("family", "")
432
+ model_parameter_size = details.get("parameter_size", "")
433
+ model_quantization = details.get("quantization_level", "")
434
+ model_size = model.get("size", 0)
435
+ model_size_gb = round(model_size / (1024**3), 2) if model_size else 0
 
 
 
436
 
437
  # Apply filters
438
+ if name_search and name_search not in model_name:
439
  continue
440
+ if family and family != model_family:
 
441
  continue
442
+ if parameter_size and parameter_size != model_parameter_size:
 
443
  continue
444
 
445
+ # Prepare filtered model entry
446
+ filtered_model = {
447
+ "name": model.get("name", ""),
448
+ "family": model_family,
449
+ "parameter_size": model_parameter_size,
450
+ "quantization_level": model_quantization,
451
+ "size_gb": model_size_gb
 
 
 
452
  }
453
 
454
+ # Add IP and port information only for admins - server-side check
455
+ if is_admin:
456
+ filtered_model["ip"] = ip
457
+ filtered_model["port"] = port
458
+
459
+ filtered_models.append(filtered_model)
460
 
461
+ # Prepare detailed model entry
462
+ detailed_model = {
463
+ "name": model.get("name", ""),
464
+ "family": model_family,
465
+ "parameter_size": model_parameter_size,
466
+ "quantization_level": model_quantization,
467
+ "size_gb": model_size_gb,
468
+ "digest": model.get("digest", ""),
469
+ "modified_at": model.get("modified_at", ""),
470
+ "country": country,
471
+ "region": region,
472
+ "org": org
473
+ }
474
+
475
+ # Add IP and port information only for admins - server-side check
476
  if is_admin:
477
+ detailed_model["ip"] = ip
478
+ detailed_model["port"] = port
479
 
480
+ detailed_models.append(detailed_model)
481
 
482
+ return filtered_models, detailed_models
483
 
484
+ def create_ui() -> gr.Blocks:
485
  """
486
+ Create the Gradio user interface with server-side authorization.
487
 
488
  Returns:
489
+ gr.Blocks: Gradio interface
490
  """
491
+ # Load dataset
 
 
 
 
 
 
 
 
492
  try:
 
 
493
  dataset = load_or_create_dataset()
494
+ except Exception as e:
495
+ # Fallback to empty dataset if loading fails
496
+ logger.error(f"Failed to load dataset: {str(e)}")
497
+ dataset = Dataset.from_dict({
498
+ "ip": [],
499
+ "port": [],
500
+ "country": [],
501
+ "region": [],
502
+ "org": [],
503
+ "models": []
504
+ })
505
+
506
+ # Server-side authorization check
507
+ is_admin = verify_admin_authorization()
508
+
509
+ # Get unique values for dropdowns
510
+ families = [""] + get_unique_values(dataset, "family")
511
+ parameter_sizes = [""] + get_unique_values(dataset, "parameter_size")
512
+
513
+ # Initial search results
514
+ initial_results, initial_details = search_models(dataset)
515
+
516
+ with gr.Blocks(title="Ollama Instance & Model Browser") as app:
517
+ gr.Markdown("# Ollama Instance & Model Browser")
518
 
519
+ with gr.Tabs() as tabs:
520
+ with gr.Tab("Browse Models"):
521
+ with gr.Row():
522
+ with gr.Column(scale=1):
523
+ name_search = gr.Textbox(label="Model Name Search")
524
+ family_dropdown = gr.Dropdown(
525
+ choices=families,
526
+ label="Model Family",
527
+ value=""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  )
529
+ parameter_size_dropdown = gr.Dropdown(
530
+ choices=parameter_sizes,
531
+ label="Parameter Size",
532
+ value=""
 
 
 
533
  )
534
+ search_button = gr.Button("Search Models")
535
 
536
+ with gr.Row():
537
+ model_results = gr.DataFrame(
538
+ value=initial_results,
539
+ label="Model Results",
540
+ interactive=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
 
543
+ with gr.Row():
544
+ model_details = gr.JSON(label="Model Details")
545
 
546
+ def search_callback(name, family, parameter_size):
547
+ results, details = search_models(dataset, name, family, parameter_size)
548
+ return results, None
549
+
550
+ def select_model(evt: gr.SelectData):
551
+ results, details = search_models(dataset, name_search.value,
552
+ family_dropdown.value,
553
+ parameter_size_dropdown.value)
554
+ if evt.index[0] < len(details):
555
+ return details[evt.index[0]]
556
+ return None
557
+
558
+ search_button.click(
559
+ search_callback,
560
+ inputs=[name_search, family_dropdown, parameter_size_dropdown],
561
+ outputs=[model_results, model_details]
562
+ )
563
+
564
+ model_results.select(
565
+ select_model,
566
+ None,
567
+ model_details
568
+ )
569
+
570
+ # Only show Shodan Scan tab for admins - server-side check
571
+ if is_admin:
572
+ with gr.Tab("Shodan Scan"):
573
+ gr.Markdown("## Scan for Ollama Instances")
574
+ gr.Markdown("**Note:** This scan will update the dataset with new Ollama instances.")
575
+ scan_button = gr.Button("Start Scan")
576
+ scan_output = gr.Textbox(label="Scan Status")
577
 
578
+ scan_button.click(
579
+ lambda progress=gr.Progress(): scan_shodan(progress),
580
+ outputs=scan_output
 
 
 
 
581
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
 
583
+ # Refresh dataset when the app starts
584
+ def refresh_data():
585
+ nonlocal dataset
586
+ try:
587
+ dataset = load_or_create_dataset()
588
+ except Exception as e:
589
+ logger.error(f"Failed to refresh dataset: {str(e)}")
590
+ # Continue with existing dataset
591
 
592
+ results, details = search_models(dataset)
593
+ return results
 
 
 
594
 
595
+ app.load(
596
+ fn=refresh_data,
597
+ outputs=model_results
598
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
 
600
+ return app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
+ # Main entry point
603
  if __name__ == "__main__":
604
+ try:
605
+ ui = create_ui()
606
+ ui.launch()
607
+ except Exception as e:
608
+ logger.critical(f"Failed to start application: {str(e)}")