latterworks commited on
Commit
e397d59
·
verified ·
1 Parent(s): 58a3354

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +440 -165
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import logging
3
  import datasets
4
  import shodan
@@ -6,39 +8,64 @@ import asyncio
6
  import aiohttp
7
  import json
8
  import gradio as gr
9
- from typing import List, Dict, Any, Optional
 
10
 
11
- # Configure logging
12
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
 
 
 
 
 
 
13
  logger = logging.getLogger(__name__)
14
 
15
- def validate_env_variables():
16
- """Validate that required environment variables are set."""
17
- required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
18
- missing_vars = [var for var in required_vars if not os.getenv(var)]
19
- if missing_vars:
20
- raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
21
-
22
  def load_or_create_dataset():
23
- """Load the dataset from HuggingFace or create it if it doesn't exist."""
24
- validate_env_variables()
 
 
 
 
 
 
 
 
 
25
  hf_token = os.getenv("HF_TOKEN")
 
 
 
 
 
26
 
27
  try:
28
- logger.info("Attempting to load dataset from HuggingFace Hub")
29
- dataset = datasets.load_dataset(
30
- "latterworks/llama_checker_results",
31
- use_auth_token=hf_token
32
- )
33
- if "train" in dataset:
34
- return dataset["train"]
 
 
 
 
 
 
 
 
 
35
  else:
36
- # If there's no train split, just take the first available split
37
- return dataset[next(iter(dataset))]
38
 
39
  except FileNotFoundError:
40
- logger.info("Dataset not found, creating a new one")
41
- # Create an empty dataset with the required schema
 
42
  empty_dataset = datasets.Dataset.from_dict({
43
  "ip": [],
44
  "port": [],
@@ -48,26 +75,38 @@ def load_or_create_dataset():
48
  "models": []
49
  })
50
 
51
- # Push the empty dataset to HuggingFace Hub
52
- empty_dataset.push_to_hub(
53
- "latterworks/llama_checker_results",
54
- token=hf_token
55
- )
56
-
57
- # Load the newly created dataset
58
- dataset = datasets.load_dataset(
59
- "latterworks/llama_checker_results",
60
- use_auth_token=hf_token
61
- )
62
-
63
- if "train" in dataset:
64
- return dataset["train"]
65
- else:
66
- return dataset[next(iter(dataset))]
67
 
68
- except Exception as e:
69
- logger.error(f"Failed to load or create dataset: {e}")
70
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def scan_shodan(progress=gr.Progress()) -> List[Dict]:
73
  """
@@ -179,89 +218,179 @@ async def check_single_endpoint(session, instance):
179
 
180
  async def check_ollama_endpoints(instances, progress=gr.Progress()):
181
  """
182
- Check multiple Ollama endpoints for available models.
183
 
184
  Args:
185
  instances: List of Ollama instances from Shodan
186
- progress: Gradio progress bar
187
 
188
  Returns:
189
- List of Ollama instances with model information
190
  """
191
  if not instances:
 
192
  return []
193
-
194
- progress(0, desc="Checking Ollama endpoints")
195
 
196
- # Set up async HTTP session
197
- async with aiohttp.ClientSession() as session:
198
- tasks = []
199
- for instance in instances:
200
- task = check_single_endpoint(session, instance)
201
- tasks.append(task)
 
 
 
 
 
202
 
203
- # Process tasks with progress updates
204
  updated_instances = []
205
- for i, task in enumerate(asyncio.as_completed(tasks)):
206
- progress((i + 1) / len(tasks), desc=f"Checking endpoint {i + 1}/{len(tasks)}")
207
- instance = await task
208
- updated_instances.append(instance)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
 
 
210
  return updated_instances
211
 
212
  def update_dataset_with_instances(dataset, instances):
213
  """
214
- Update the HuggingFace dataset with new Ollama instances.
 
 
 
 
 
215
 
216
  Args:
217
- dataset: HuggingFace dataset
218
  instances: List of Ollama instances with model information
219
 
220
  Returns:
221
- Updated HuggingFace dataset
222
  """
223
  if not instances:
224
- logger.warning("No instances to update in dataset")
225
  return dataset
226
-
227
- # Convert dataset to list of dictionaries for easier manipulation
228
- dataset_dict = {f"{item['ip']}:{item['port']}": item for item in dataset.to_list()}
229
 
230
- # Process each instance
231
- updates_count = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  new_instances = []
233
 
234
  for instance in instances:
 
 
 
 
235
  instance_key = f"{instance['ip']}:{instance['port']}"
236
 
237
  if instance_key in dataset_dict:
238
- # Update existing instance
239
- dataset_dict[instance_key]['country'] = instance.get('country', dataset_dict[instance_key].get('country'))
240
- dataset_dict[instance_key]['region'] = instance.get('region', dataset_dict[instance_key].get('region'))
241
- dataset_dict[instance_key]['org'] = instance.get('org', dataset_dict[instance_key].get('org'))
 
 
 
 
242
 
243
- # Only update models if they were found
244
  if instance.get('models'):
245
- dataset_dict[instance_key]['models'] = instance['models']
 
 
 
 
 
 
246
 
247
- updates_count += 1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  else:
249
- # Add new instance
250
  new_instances.append(instance)
 
251
 
252
- # Create updated dataset list
253
- updated_dataset_list = list(dataset_dict.values()) + new_instances
254
-
255
- # Create a new dataset from the updated list
256
- updated_dataset = datasets.Dataset.from_list(updated_dataset_list)
257
-
258
- # Push updated dataset to HuggingFace Hub
259
- hf_token = os.getenv("HF_TOKEN")
260
- updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
261
-
262
- logger.info(f"Updated {updates_count} existing instances and added {len(new_instances)} new instances to dataset")
263
-
264
- return updated_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
266
  def get_unique_values(dataset):
267
  """
@@ -377,150 +506,296 @@ def search_models(dataset, family=None, parameter_size=None, name_search=None, i
377
  return results
378
 
379
  def create_interface():
380
- """Create the Gradio interface for the application."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  try:
382
- # Load dataset once at startup
 
383
  dataset = load_or_create_dataset()
384
 
385
- # Get unique values for dropdowns once at startup
386
  unique_values = get_unique_values(dataset)
 
387
 
388
- # Get all models to display on initial load
389
  initial_results = search_models(dataset)
 
 
 
 
 
 
390
 
391
- # Create Gradio interface
392
- with gr.Blocks(title="Ollama Instance Scanner") as interface:
393
- gr.Markdown("# Ollama Instance Scanner")
394
- gr.Markdown("Browse publicly accessible Ollama instances and their models")
 
 
 
 
 
 
 
395
 
 
396
  with gr.Tabs() as tabs:
397
- # Browse Models Tab
398
  with gr.TabItem("Browse Models"):
399
  with gr.Row():
 
400
  with gr.Column(scale=1):
401
- family_dropdown = gr.Dropdown(
402
- choices=["All"] + unique_values['families'],
403
- value="All",
404
- label="Filter by Family"
405
- )
406
- parameter_size_dropdown = gr.Dropdown(
407
- choices=["All"] + unique_values['parameter_sizes'],
408
- value="All",
409
- label="Filter by Parameter Size"
410
- )
411
- name_search = gr.Textbox(
412
- label="Search by Name",
413
- placeholder="Enter model name..."
414
- )
415
- search_button = gr.Button("Search")
 
 
 
 
 
416
 
 
417
  with gr.Row():
 
418
  results_table = gr.DataFrame(
419
  value=initial_results,
420
  headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
421
- label="Search Results"
 
 
422
  )
423
 
 
424
  with gr.Row():
425
- model_details = gr.JSON(label="Model Details")
 
 
 
 
426
 
427
- # Shodan Scan Tab (Admin only)
428
- with gr.TabItem("Shodan Scan (Admin Only)"):
429
- gr.Markdown("## Shodan Scan")
430
- gr.Markdown("This tab allows scanning for Ollama instances using Shodan. You need a valid Shodan API key set as an environment variable.")
431
-
432
- shodan_scan_button = gr.Button("Start Shodan Scan")
433
- scan_status = gr.Textbox(label="Scan Status", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
- # Define event handlers
436
  def on_search_click(family, parameter_size, name_search):
437
- # Use "All" as a signal not to filter
438
- family_filter = None if family == "All" else family
439
- param_size_filter = None if parameter_size == "All" else parameter_size
440
- name_filter = None if not name_search else name_search
441
-
442
- # Check if admin mode is enabled (would need to implement proper authentication)
443
- is_admin = False # This should be based on proper authentication
444
-
445
- # Search for models
446
- results = search_models(dataset, family_filter, param_size_filter, name_filter, is_admin)
447
-
448
- # Return results
449
- return results
 
 
 
 
 
 
 
 
 
450
 
451
  def on_table_select(evt: gr.SelectData, results):
452
- if evt.index[0] < len(results):
453
- selected_row = results[evt.index[0]]
454
- return selected_row.get('full_model_info', {})
455
- return {}
 
 
 
 
 
 
456
 
457
  async def run_shodan_scan():
 
 
 
 
 
 
 
458
  try:
459
- # Verify Shodan API Key exists
460
- if not os.getenv("SHODAN_API_KEY"):
461
- return "Error: SHODAN_API_KEY environment variable is not set."
462
-
463
- # Perform Shodan scan
464
  instances = scan_shodan()
465
-
466
  if not instances:
467
- return "No Ollama instances found in Shodan scan."
468
 
469
- # Check Ollama endpoints
 
 
 
470
  updated_instances = await check_ollama_endpoints(instances)
 
 
471
 
472
- # Update dataset
473
  nonlocal dataset
474
  dataset = update_dataset_with_instances(dataset, updated_instances)
475
 
476
- # Update unique values
477
  nonlocal unique_values
478
  unique_values = get_unique_values(dataset)
479
 
480
- # Update dropdown choices
481
  family_dropdown.choices = ["All"] + unique_values['families']
482
  parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
483
 
484
- return f"Scan completed successfully. Found {len(instances)} instances, {sum(1 for i in updated_instances if i.get('models'))} with accessible models."
485
- except Exception as e:
486
- logger.error(f"Error in Shodan scan: {e}")
487
- return f"Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
 
489
- # Connect event handlers
490
  search_button.click(
491
- on_search_click,
492
  inputs=[family_dropdown, parameter_size_dropdown, name_search],
493
  outputs=[results_table]
494
  )
495
 
496
  results_table.select(
497
- on_table_select,
498
  inputs=[results_table],
499
  outputs=[model_details]
500
  )
501
 
502
  shodan_scan_button.click(
503
- run_shodan_scan,
504
  inputs=[],
505
  outputs=[scan_status]
506
  )
507
 
 
508
  return interface
509
 
510
- except Exception as e:
511
- logger.error(f"Failed to create Gradio interface: {e}")
512
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
  def main():
515
- """Main function to run the application."""
 
 
516
  try:
 
 
 
 
 
517
  interface = create_interface()
 
518
  if interface:
 
519
  interface.launch()
520
  else:
521
- logger.error("Failed to create interface")
522
- except Exception as e:
523
- logger.error(f"Application failed: {e}")
 
 
 
 
 
 
 
 
 
524
 
525
  if __name__ == "__main__":
526
  main()
 
1
  import os
2
+ import sys
3
+ import time
4
  import logging
5
  import datasets
6
  import shodan
 
8
  import aiohttp
9
  import json
10
  import gradio as gr
11
+ from typing import List, Dict, Any, Optional, Tuple, Set, Union
12
+ from concurrent.futures import ThreadPoolExecutor
13
 
14
+ # Configure production-grade logging
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(name)s - %(levelname)s [%(filename)s:%(lineno)d] - %(message)s',
18
+ handlers=[
19
+ logging.StreamHandler(),
20
+ logging.FileHandler("ollama_scanner.log")
21
+ ]
22
+ )
23
  logger = logging.getLogger(__name__)
24
 
 
 
 
 
 
 
 
25
  def load_or_create_dataset():
26
+ """
27
+ Load dataset from HuggingFace with optimized error handling and authentication.
28
+
29
+ Returns:
30
+ Dataset: The loaded dataset object ready for query operations
31
+
32
+ Raises:
33
+ ValueError: When authentication fails or dataset structure is invalid
34
+ ConnectionError: When network issues prevent dataset access
35
+ """
36
+ # HF token must exist for private dataset access
37
  hf_token = os.getenv("HF_TOKEN")
38
+ if not hf_token:
39
+ raise ValueError("HF_TOKEN environment variable missing or empty - authentication required")
40
+
41
+ dataset_id = "latterworks/llama_checker_results"
42
+ logger.info(f"Initializing dataset access: {dataset_id}")
43
 
44
  try:
45
+ # First attempt: Try modern token parameter
46
+ try:
47
+ dataset = datasets.load_dataset(dataset_id, token=hf_token)
48
+ except TypeError:
49
+ # Fallback: Use legacy authentication parameter
50
+ logger.info("Attempting legacy authentication method")
51
+ dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
52
+
53
+ # Extract the appropriate split
54
+ if isinstance(dataset, datasets.DatasetDict):
55
+ if "train" in dataset:
56
+ return dataset["train"]
57
+ # No train split found, use first available
58
+ first_split = next(iter(dataset))
59
+ logger.info(f"No 'train' split found, using '{first_split}' split")
60
+ return dataset[first_split]
61
  else:
62
+ # Handle direct Dataset object (no splits)
63
+ return dataset
64
 
65
  except FileNotFoundError:
66
+ logger.info(f"Dataset {dataset_id} not found - creating new dataset")
67
+
68
+ # Prepare empty dataset with precise schema
69
  empty_dataset = datasets.Dataset.from_dict({
70
  "ip": [],
71
  "port": [],
 
75
  "models": []
76
  })
77
 
78
+ try:
79
+ # Create dataset on Hub with correct token parameter
80
+ empty_dataset.push_to_hub(dataset_id, token=hf_token)
81
+ logger.info(f"Successfully created empty dataset: {dataset_id}")
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Load the newly created dataset
84
+ try:
85
+ dataset = datasets.load_dataset(dataset_id, token=hf_token)
86
+ except TypeError:
87
+ dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
88
+
89
+ # Extract appropriate split
90
+ if isinstance(dataset, datasets.DatasetDict):
91
+ if "train" in dataset:
92
+ return dataset["train"]
93
+ first_split = next(iter(dataset))
94
+ logger.info(f"Using '{first_split}' split from newly created dataset")
95
+ return dataset[first_split]
96
+ else:
97
+ return dataset
98
+
99
+ except Exception as creation_error:
100
+ logger.error(f"Dataset creation failed: {creation_error}")
101
+ raise ValueError(f"Failed to create dataset: {creation_error}") from creation_error
102
+
103
+ except (ConnectionError, TimeoutError) as network_error:
104
+ logger.error(f"Network error accessing dataset: {network_error}")
105
+ raise ConnectionError(f"Network failure accessing HuggingFace Hub: {network_error}") from network_error
106
+
107
+ except Exception as general_error:
108
+ logger.error(f"Unexpected error accessing dataset: {general_error}")
109
+ raise ValueError(f"Dataset access failed: {general_error}") from general_error
110
 
111
  def scan_shodan(progress=gr.Progress()) -> List[Dict]:
112
  """
 
218
 
219
  async def check_ollama_endpoints(instances, progress=gr.Progress()):
220
  """
221
+ Efficiently check multiple Ollama endpoints with concurrent processing and comprehensive error handling.
222
 
223
  Args:
224
  instances: List of Ollama instances from Shodan
225
+ progress: Gradio progress bar for visual feedback
226
 
227
  Returns:
228
+ List of Ollama instances with enriched model information
229
  """
230
  if not instances:
231
+ logger.info("No instances to check - skipping endpoint verification")
232
  return []
 
 
233
 
234
+ total_instances = len(instances)
235
+ logger.info(f"Initiating concurrent validation of {total_instances} Ollama endpoints")
236
+ progress(0, desc=f"Preparing to check {total_instances} Ollama endpoints")
237
+
238
+ # Configure optimized session with connection pooling and timeouts
239
+ conn = aiohttp.TCPConnector(limit=50, ttl_dns_cache=300)
240
+ timeout = aiohttp.ClientTimeout(total=30, connect=5, sock_connect=5, sock_read=20)
241
+
242
+ async with aiohttp.ClientSession(connector=conn, timeout=timeout) as session:
243
+ # Create task queue
244
+ tasks = [check_single_endpoint(session, instance) for instance in instances]
245
 
246
+ # Process with dynamic progress tracking
247
  updated_instances = []
248
+ completed = 0
249
+
250
+ for future in asyncio.as_completed(tasks):
251
+ try:
252
+ # Process completed task
253
+ instance = await future
254
+ updated_instances.append(instance)
255
+
256
+ # Update progress with meaningful metrics
257
+ completed += 1
258
+ progress_pct = completed / total_instances
259
+ progress(progress_pct, desc=f"Checked {completed}/{total_instances} endpoints ({progress_pct:.1%})")
260
+
261
+ # Log models found
262
+ if instance.get('models'):
263
+ logger.info(f"Found {len(instance['models'])} models at {instance['ip']}:{instance['port']}")
264
+
265
+ except Exception as task_error:
266
+ # Handle per-task errors without stopping the process
267
+ logger.warning(f"Endpoint check failed: {task_error}")
268
+ # Continue processing remaining endpoints
269
 
270
+ valid_instances = [i for i in updated_instances if i.get('models')]
271
+ logger.info(f"Endpoint validation complete: {len(valid_instances)}/{total_instances} accessible")
272
  return updated_instances
273
 
274
  def update_dataset_with_instances(dataset, instances):
275
  """
276
+ Efficiently update HuggingFace dataset with optimized delta synchronization.
277
+
278
+ Implements single-pass dataset updates with:
279
+ 1. Optimized in-memory index of existing entries
280
+ 2. Differential detection of new vs. modified instances
281
+ 3. Single hub push with consolidated changes
282
 
283
  Args:
284
+ dataset: HuggingFace dataset object to update
285
  instances: List of Ollama instances with model information
286
 
287
  Returns:
288
+ Updated HuggingFace dataset with synchronized changes
289
  """
290
  if not instances:
291
+ logger.warning("No instance data provided for dataset update operation")
292
  return dataset
 
 
 
293
 
294
+ start_time = time.time()
295
+
296
+ # Optimization: Create indexed lookup of existing instances for O(1) access
297
+ dataset_dict = {}
298
+ for idx, item in enumerate(dataset):
299
+ key = f"{item['ip']}:{item['port']}"
300
+ dataset_dict[key] = {
301
+ 'idx': idx,
302
+ 'data': item
303
+ }
304
+
305
+ # Track modification metrics
306
+ stats = {
307
+ 'new': 0,
308
+ 'updated': 0,
309
+ 'unchanged': 0,
310
+ 'models_added': 0
311
+ }
312
+
313
+ # Process differentials
314
+ update_candidates = []
315
  new_instances = []
316
 
317
  for instance in instances:
318
+ # Skip instances without valid IP
319
+ if not instance.get('ip'):
320
+ continue
321
+
322
  instance_key = f"{instance['ip']}:{instance['port']}"
323
 
324
  if instance_key in dataset_dict:
325
+ # Existing instance - determine if update needed
326
+ existing = dataset_dict[instance_key]['data']
327
+ needs_update = False
328
+
329
+ # Check metadata changes
330
+ for field in ['country', 'region', 'org']:
331
+ if instance.get(field) and instance.get(field) != existing.get(field):
332
+ needs_update = True
333
 
334
+ # Check model changes - only update if models were found
335
  if instance.get('models'):
336
+ # Compare model signatures to detect changes
337
+ existing_models = {model.get('name', ''): model for model in existing.get('models', [])}
338
+ new_models = {model.get('name', ''): model for model in instance.get('models', [])}
339
+
340
+ if set(new_models.keys()) != set(existing_models.keys()):
341
+ needs_update = True
342
+ stats['models_added'] += len(set(new_models.keys()) - set(existing_models.keys()))
343
 
344
+ if needs_update:
345
+ # Create updated instance
346
+ updated = dict(existing)
347
+ updated.update({
348
+ 'country': instance.get('country', existing.get('country')),
349
+ 'region': instance.get('region', existing.get('region')),
350
+ 'org': instance.get('org', existing.get('org')),
351
+ })
352
+
353
+ # Only update models if they were found
354
+ if instance.get('models'):
355
+ updated['models'] = instance['models']
356
+
357
+ update_candidates.append(updated)
358
+ stats['updated'] += 1
359
+ else:
360
+ stats['unchanged'] += 1
361
  else:
362
+ # New instance
363
  new_instances.append(instance)
364
+ stats['new'] += 1
365
 
366
+ # Efficiently construct updated dataset
367
+ if new_instances or update_candidates:
368
+ # Start with current dataset
369
+ current_data = dataset.to_list()
370
+
371
+ # Apply updates
372
+ for updated in update_candidates:
373
+ instance_key = f"{updated['ip']}:{updated['port']}"
374
+ idx = dataset_dict[instance_key]['idx']
375
+ current_data[idx] = updated
376
+
377
+ # Add new instances
378
+ current_data.extend(new_instances)
379
+
380
+ # Create updated dataset
381
+ updated_dataset = datasets.Dataset.from_list(current_data)
382
+
383
+ # Push to hub with single operation
384
+ hf_token = os.getenv("HF_TOKEN")
385
+ updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
386
+
387
+ execution_time = time.time() - start_time
388
+ logger.info(f"Dataset synchronization complete in {execution_time:.2f}s: {stats['new']} new, {stats['updated']} updated, {stats['unchanged']} unchanged, {stats['models_added']} new models")
389
+
390
+ return updated_dataset
391
+ else:
392
+ logger.info("No dataset changes detected - skipping hub synchronization")
393
+ return dataset
394
 
395
  def get_unique_values(dataset):
396
  """
 
506
  return results
507
 
508
  def create_interface():
509
+ """
510
+ Create enterprise-grade Gradio interface with optimized data loading and admin authentication.
511
+
512
+ Returns:
513
+ gr.Blocks: Fully configured Gradio interface ready for deployment
514
+ """
515
+ # Administrative authentication function
516
+ def validate_admin():
517
+ """Check if current user has admin privileges based on API key"""
518
+ # For production systems, this would use proper authentication
519
+ # Currently using API key presence as simple auth mechanism
520
+ admin_key = os.getenv("ADMIN_KEY", "")
521
+ shodan_key = os.getenv("SHODAN_API_KEY", "")
522
+ return bool(admin_key and shodan_key)
523
+
524
  try:
525
+ # Initialize critical data structures once at startup
526
+ logger.info("Initializing application data layer")
527
  dataset = load_or_create_dataset()
528
 
529
+ # Extract model metadata attributes for filtering
530
  unique_values = get_unique_values(dataset)
531
+ logger.info(f"Loaded dataset with {len(unique_values['families'])} model families and {len(unique_values['parameter_sizes'])} parameter sizes")
532
 
533
+ # Preload initial model data
534
  initial_results = search_models(dataset)
535
+ logger.info(f"Preloaded {len(initial_results)} models for initial display")
536
+
537
+ # Determine administrative access
538
+ is_admin = validate_admin()
539
+ admin_status = "enabled" if is_admin else "disabled"
540
+ logger.info(f"Administrative access: {admin_status}")
541
 
542
+ # Create interface with optimized structure
543
+ with gr.Blocks(
544
+ title="Ollama Instance Scanner",
545
+ theme=gr.themes.Soft(),
546
+ css=".footer {text-align: center; margin-top: 20px; color: #666;}"
547
+ ) as interface:
548
+ # Header section
549
+ with gr.Row():
550
+ with gr.Column():
551
+ gr.Markdown("# Ollama Instance Scanner")
552
+ gr.Markdown("Browse publicly accessible Ollama models and their capabilities")
553
 
554
+ # Tab container
555
  with gr.Tabs() as tabs:
556
+ # Tab 1: Model Browser (Public)
557
  with gr.TabItem("Browse Models"):
558
  with gr.Row():
559
+ # Filter controls
560
  with gr.Column(scale=1):
561
+ with gr.Box():
562
+ gr.Markdown("### Search Filters")
563
+ family_dropdown = gr.Dropdown(
564
+ choices=["All"] + unique_values['families'],
565
+ value="All",
566
+ label="Model Family",
567
+ interactive=True
568
+ )
569
+ parameter_size_dropdown = gr.Dropdown(
570
+ choices=["All"] + unique_values['parameter_sizes'],
571
+ value="All",
572
+ label="Parameter Size",
573
+ interactive=True
574
+ )
575
+ name_search = gr.Textbox(
576
+ label="Model Name",
577
+ placeholder="Enter model name...",
578
+ interactive=True
579
+ )
580
+ search_button = gr.Button("Search Models", variant="primary")
581
 
582
+ # Results section
583
  with gr.Row():
584
+ # Model results table
585
  results_table = gr.DataFrame(
586
  value=initial_results,
587
  headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
588
+ label="Available Models",
589
+ interactive=False,
590
+ wrap=True
591
  )
592
 
593
+ # Details section
594
  with gr.Row():
595
+ # Model specifications panel
596
+ model_details = gr.JSON(
597
+ label="Model Specifications",
598
+ visible=True
599
+ )
600
 
601
+ # Tab 2: Shodan Scanner (Admin Only)
602
+ with gr.TabItem("Shodan Scan", visible=is_admin):
603
+ with gr.Box():
604
+ gr.Markdown("## Ollama Instance Scanner")
605
+ gr.Markdown("This tool scans for publicly accessible Ollama instances using Shodan API")
606
+
607
+ # Scanner controls
608
+ with gr.Row():
609
+ shodan_scan_button = gr.Button(
610
+ "Start Shodan Scan",
611
+ variant="primary",
612
+ interactive=is_admin
613
+ )
614
+
615
+ # Status display
616
+ with gr.Row():
617
+ scan_status = gr.Textbox(
618
+ label="Scan Status",
619
+ value="Ready to scan" if is_admin else "Admin access required",
620
+ interactive=False
621
+ )
622
+
623
+ # Footer
624
+ gr.Markdown(
625
+ "### Ollama Instance Scanner | Powered by Shodan & Hugging Face",
626
+ elem_classes=["footer"]
627
+ )
628
 
629
+ # Define optimized event handlers
630
  def on_search_click(family, parameter_size, name_search):
631
+ """Process model search with optimized filtering"""
632
+ try:
633
+ # Apply filters
634
+ family_filter = None if family == "All" else family
635
+ param_size_filter = None if parameter_size == "All" else parameter_size
636
+ name_filter = None if not name_search else name_search.strip()
637
+
638
+ # Execute search with admin privileges if available
639
+ results = search_models(
640
+ dataset,
641
+ family_filter,
642
+ param_size_filter,
643
+ name_filter,
644
+ is_admin
645
+ )
646
+
647
+ logger.info(f"Search completed: {len(results)} models found matching criteria")
648
+ return results
649
+ except Exception as search_error:
650
+ logger.error(f"Search failed: {search_error}")
651
+ # Return empty results on error
652
+ return []
653
 
654
  def on_table_select(evt: gr.SelectData, results):
655
+ """Handle table row selection with error protection"""
656
+ try:
657
+ if evt and evt.index and len(results) > evt.index[0]:
658
+ selected_row = results[evt.index[0]]
659
+ # Extract and return model details
660
+ return selected_row.get('full_model_info', "{}")
661
+ return "{}"
662
+ except Exception as selection_error:
663
+ logger.error(f"Selection error: {selection_error}")
664
+ return "{}"
665
 
666
  async def run_shodan_scan():
667
+ """Execute Shodan scan workflow with comprehensive monitoring"""
668
+ if not is_admin:
669
+ return "Error: Administrative access required"
670
+
671
+ scan_id = int(time.time()) # Generate unique scan identifier
672
+ logger.info(f"Initiating Shodan scan {scan_id}")
673
+
674
  try:
675
+ # Phase 1: Shodan API scan
 
 
 
 
676
  instances = scan_shodan()
 
677
  if not instances:
678
+ return "Scan complete: No Ollama instances found"
679
 
680
+ instance_count = len(instances)
681
+ logger.info(f"Scan {scan_id}: Found {instance_count} potential Ollama instances")
682
+
683
+ # Phase 2: Endpoint validation
684
  updated_instances = await check_ollama_endpoints(instances)
685
+ accessible_count = sum(1 for i in updated_instances if i.get('models'))
686
+ logger.info(f"Scan {scan_id}: Validated {accessible_count} accessible instances")
687
 
688
+ # Phase 3: Dataset synchronization
689
  nonlocal dataset
690
  dataset = update_dataset_with_instances(dataset, updated_instances)
691
 
692
+ # Phase 4: Interface update
693
  nonlocal unique_values
694
  unique_values = get_unique_values(dataset)
695
 
696
+ # Update UI components with new data
697
  family_dropdown.choices = ["All"] + unique_values['families']
698
  parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
699
 
700
+ # Build detailed completion report
701
+ report = (
702
+ f"Scan {scan_id} completed successfully:\n"
703
+ f" {instance_count} total instances discovered\n"
704
+ f"• {accessible_count} instances with accessible models\n"
705
+ f"• {len(unique_values['families'])} unique model families\n"
706
+ f"• {len(unique_values['parameter_sizes'])} parameter size variants"
707
+ )
708
+
709
+ logger.info(f"Scan {scan_id} completed successfully")
710
+ return report
711
+
712
+ except Exception as scan_error:
713
+ logger.error(f"Scan {scan_id} failed: {scan_error}")
714
+
715
+ # Generate actionable error message
716
+ if isinstance(scan_error, ValueError) and "API key" in str(scan_error):
717
+ return "Error: Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable."
718
+ elif isinstance(scan_error, ConnectionError):
719
+ return "Error: Network connectivity issue. Please check your internet connection."
720
+ else:
721
+ return f"Error: Scan operation failed - {str(scan_error)}"
722
 
723
+ # Connect event handlers to UI components
724
  search_button.click(
725
+ fn=on_search_click,
726
  inputs=[family_dropdown, parameter_size_dropdown, name_search],
727
  outputs=[results_table]
728
  )
729
 
730
  results_table.select(
731
+ fn=on_table_select,
732
  inputs=[results_table],
733
  outputs=[model_details]
734
  )
735
 
736
  shodan_scan_button.click(
737
+ fn=run_shodan_scan,
738
  inputs=[],
739
  outputs=[scan_status]
740
  )
741
 
742
+ logger.info("Gradio interface successfully initialized")
743
  return interface
744
 
745
+ except Exception as interface_error:
746
+ logger.critical(f"Interface initialization failed: {interface_error}")
747
+ raise ValueError(f"Failed to create application interface: {interface_error}") from interface_error
748
+
749
+ def validate_env_variables():
750
+ """
751
+ Centralized validation of critical environment variables with precise error messaging.
752
+
753
+ Raises:
754
+ ValueError: When any required environment variable is missing
755
+ """
756
+ required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
757
+ missing_vars = [var for var in required_vars if not os.getenv(var)]
758
+
759
+ if missing_vars:
760
+ error_msg = f"Missing critical environment variables: {', '.join(missing_vars)}"
761
+ logger.critical(error_msg)
762
+ raise ValueError(error_msg)
763
+
764
+ # Validate token quality
765
+ hf_token = os.getenv("HF_TOKEN")
766
+ if len(hf_token) < 8: # Minimum length for plausible token
767
+ logger.warning("HF_TOKEN appears malformed (insufficient length)")
768
+
769
+ logger.info("Environment validation successful - all required variables present")
770
 
771
  def main():
772
+ """
773
+ Application entry point with centralized error handling and environment validation.
774
+ """
775
  try:
776
+ # Validate environment once at startup
777
+ validate_env_variables()
778
+
779
+ # Initialize and launch interface
780
+ logger.info("Initializing Gradio interface")
781
  interface = create_interface()
782
+
783
  if interface:
784
+ logger.info("Starting Gradio server")
785
  interface.launch()
786
  else:
787
+ logger.critical("Interface initialization failed")
788
+ sys.exit(1)
789
+
790
+ except ValueError as config_error:
791
+ # Handle configuration errors
792
+ logger.critical(f"Configuration error: {config_error}")
793
+ sys.exit(1)
794
+
795
+ except Exception as fatal_error:
796
+ # Handle unexpected errors
797
+ logger.critical(f"Fatal application error: {fatal_error}")
798
+ sys.exit(1)
799
 
800
  if __name__ == "__main__":
801
  main()