Spaces:

latterworks
/

agen

Sleeping

App Files Files Community

latterworks commited on Mar 20

Commit

e397d59

verified ·

1 Parent(s): 58a3354

Update app.py

Browse files

Files changed (1) hide show

app.py +440 -165

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import os
 import logging
 import datasets
 import shodan
@@ -6,39 +8,64 @@ import asyncio
 import aiohttp
 import json
 import gradio as gr
-from typing import List, Dict, Any, Optional
-# Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-def validate_env_variables():
-    """Validate that required environment variables are set."""
-    required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
-    missing_vars = [var for var in required_vars if not os.getenv(var)]
-    if missing_vars:
-        raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
 def load_or_create_dataset():
-    """Load the dataset from HuggingFace or create it if it doesn't exist."""
-    validate_env_variables()
     hf_token = os.getenv("HF_TOKEN")
     try:
-        logger.info("Attempting to load dataset from HuggingFace Hub")
-        dataset = datasets.load_dataset(
-            "latterworks/llama_checker_results",
-            use_auth_token=hf_token
-        )
-        if "train" in dataset:
-            return dataset["train"]
         else:
-            # If there's no train split, just take the first available split
-            return dataset[next(iter(dataset))]
     except FileNotFoundError:
-        logger.info("Dataset not found, creating a new one")
-        # Create an empty dataset with the required schema
         empty_dataset = datasets.Dataset.from_dict({
             "ip": [],
             "port": [],
@@ -48,26 +75,38 @@ def load_or_create_dataset():
             "models": []
         })
-        # Push the empty dataset to HuggingFace Hub
-        empty_dataset.push_to_hub(
-            "latterworks/llama_checker_results",
-            token=hf_token
-        )
-        # Load the newly created dataset
-        dataset = datasets.load_dataset(
-            "latterworks/llama_checker_results",
-            use_auth_token=hf_token
-        )
-        if "train" in dataset:
-            return dataset["train"]
-        else:
-            return dataset[next(iter(dataset))]
-    except Exception as e:
-        logger.error(f"Failed to load or create dataset: {e}")
-        raise
 def scan_shodan(progress=gr.Progress()) -> List[Dict]:
     """
@@ -179,89 +218,179 @@ async def check_single_endpoint(session, instance):
 async def check_ollama_endpoints(instances, progress=gr.Progress()):
     """
-    Check multiple Ollama endpoints for available models.
     Args:
         instances: List of Ollama instances from Shodan
-        progress: Gradio progress bar
     Returns:
-        List of Ollama instances with model information
     """
     if not instances:
         return []
-    progress(0, desc="Checking Ollama endpoints")
-    # Set up async HTTP session
-    async with aiohttp.ClientSession() as session:
-        tasks = []
-        for instance in instances:
-            task = check_single_endpoint(session, instance)
-            tasks.append(task)
-        # Process tasks with progress updates
         updated_instances = []
-        for i, task in enumerate(asyncio.as_completed(tasks)):
-            progress((i + 1) / len(tasks), desc=f"Checking endpoint {i + 1}/{len(tasks)}")
-            instance = await task
-            updated_instances.append(instance)
     return updated_instances
 def update_dataset_with_instances(dataset, instances):
     """
-    Update the HuggingFace dataset with new Ollama instances.
     Args:
-        dataset: HuggingFace dataset
         instances: List of Ollama instances with model information
     Returns:
-        Updated HuggingFace dataset
     """
     if not instances:
-        logger.warning("No instances to update in dataset")
         return dataset
-    # Convert dataset to list of dictionaries for easier manipulation
-    dataset_dict = {f"{item['ip']}:{item['port']}": item for item in dataset.to_list()}
-    # Process each instance
-    updates_count = 0
     new_instances = []
     for instance in instances:
         instance_key = f"{instance['ip']}:{instance['port']}"
         if instance_key in dataset_dict:
-            # Update existing instance
-            dataset_dict[instance_key]['country'] = instance.get('country', dataset_dict[instance_key].get('country'))
-            dataset_dict[instance_key]['region'] = instance.get('region', dataset_dict[instance_key].get('region'))
-            dataset_dict[instance_key]['org'] = instance.get('org', dataset_dict[instance_key].get('org'))
-            # Only update models if they were found
             if instance.get('models'):
-                dataset_dict[instance_key]['models'] = instance['models']
-            updates_count += 1
         else:
-            # Add new instance
             new_instances.append(instance)
-    # Create updated dataset list
-    updated_dataset_list = list(dataset_dict.values()) + new_instances
-    # Create a new dataset from the updated list
-    updated_dataset = datasets.Dataset.from_list(updated_dataset_list)
-    # Push updated dataset to HuggingFace Hub
-    hf_token = os.getenv("HF_TOKEN")
-    updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
-    logger.info(f"Updated {updates_count} existing instances and added {len(new_instances)} new instances to dataset")
-    return updated_dataset
 def get_unique_values(dataset):
     """
@@ -377,150 +506,296 @@ def search_models(dataset, family=None, parameter_size=None, name_search=None, i
     return results
 def create_interface():
-    """Create the Gradio interface for the application."""
     try:
-        # Load dataset once at startup
         dataset = load_or_create_dataset()
-        # Get unique values for dropdowns once at startup
         unique_values = get_unique_values(dataset)
-        # Get all models to display on initial load
         initial_results = search_models(dataset)
-        # Create Gradio interface
-        with gr.Blocks(title="Ollama Instance Scanner") as interface:
-            gr.Markdown("# Ollama Instance Scanner")
-            gr.Markdown("Browse publicly accessible Ollama instances and their models")
             with gr.Tabs() as tabs:
-                # Browse Models Tab
                 with gr.TabItem("Browse Models"):
                     with gr.Row():
                         with gr.Column(scale=1):
-                            family_dropdown = gr.Dropdown(
-                                choices=["All"] + unique_values['families'],
-                                value="All",
-                                label="Filter by Family"
-                            )
-                            parameter_size_dropdown = gr.Dropdown(
-                                choices=["All"] + unique_values['parameter_sizes'],
-                                value="All",
-                                label="Filter by Parameter Size"
-                            )
-                            name_search = gr.Textbox(
-                                label="Search by Name",
-                                placeholder="Enter model name..."
-                            )
-                            search_button = gr.Button("Search")
                     with gr.Row():
                         results_table = gr.DataFrame(
                             value=initial_results,
                             headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
-                            label="Search Results"
                         )
                     with gr.Row():
-                        model_details = gr.JSON(label="Model Details")
-                # Shodan Scan Tab (Admin only)
-                with gr.TabItem("Shodan Scan (Admin Only)"):
-                    gr.Markdown("## Shodan Scan")
-                    gr.Markdown("This tab allows scanning for Ollama instances using Shodan. You need a valid Shodan API key set as an environment variable.")
-                    shodan_scan_button = gr.Button("Start Shodan Scan")
-                    scan_status = gr.Textbox(label="Scan Status", interactive=False)
-            # Define event handlers
             def on_search_click(family, parameter_size, name_search):
-                # Use "All" as a signal not to filter
-                family_filter = None if family == "All" else family
-                param_size_filter = None if parameter_size == "All" else parameter_size
-                name_filter = None if not name_search else name_search
-                # Check if admin mode is enabled (would need to implement proper authentication)
-                is_admin = False  # This should be based on proper authentication
-                # Search for models
-                results = search_models(dataset, family_filter, param_size_filter, name_filter, is_admin)
-                # Return results
-                return results
             def on_table_select(evt: gr.SelectData, results):
-                if evt.index[0] < len(results):
-                    selected_row = results[evt.index[0]]
-                    return selected_row.get('full_model_info', {})
-                return {}
             async def run_shodan_scan():
                 try:
-                    # Verify Shodan API Key exists
-                    if not os.getenv("SHODAN_API_KEY"):
-                        return "Error: SHODAN_API_KEY environment variable is not set."
-                    # Perform Shodan scan
                     instances = scan_shodan()
                     if not instances:
-                        return "No Ollama instances found in Shodan scan."
-                    # Check Ollama endpoints
                     updated_instances = await check_ollama_endpoints(instances)
-                    # Update dataset
                     nonlocal dataset
                     dataset = update_dataset_with_instances(dataset, updated_instances)
-                    # Update unique values
                     nonlocal unique_values
                     unique_values = get_unique_values(dataset)
-                    # Update dropdown choices
                     family_dropdown.choices = ["All"] + unique_values['families']
                     parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
-                    return f"Scan completed successfully. Found {len(instances)} instances, {sum(1 for i in updated_instances if i.get('models'))} with accessible models."
-                except Exception as e:
-                    logger.error(f"Error in Shodan scan: {e}")
-                    return f"Error: {str(e)}"
-            # Connect event handlers
             search_button.click(
-                on_search_click,
                 inputs=[family_dropdown, parameter_size_dropdown, name_search],
                 outputs=[results_table]
             )
             results_table.select(
-                on_table_select,
                 inputs=[results_table],
                 outputs=[model_details]
             )
             shodan_scan_button.click(
-                run_shodan_scan,
                 inputs=[],
                 outputs=[scan_status]
             )
         return interface
-    except Exception as e:
-        logger.error(f"Failed to create Gradio interface: {e}")
-        raise
 def main():
-    """Main function to run the application."""
     try:
         interface = create_interface()
         if interface:
             interface.launch()
         else:
-            logger.error("Failed to create interface")
-    except Exception as e:
-        logger.error(f"Application failed: {e}")
 if __name__ == "__main__":
     main()

 import os
+import sys
+import time
 import logging
 import datasets
 import shodan
 import aiohttp
 import json
 import gradio as gr
+from typing import List, Dict, Any, Optional, Tuple, Set, Union
+from concurrent.futures import ThreadPoolExecutor
+# Configure production-grade logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s [%(filename)s:%(lineno)d] - %(message)s',
+    handlers=[
+        logging.StreamHandler(),
+        logging.FileHandler("ollama_scanner.log")
+    ]
+)
 logger = logging.getLogger(__name__)
 def load_or_create_dataset():
+    """
+    Load dataset from HuggingFace with optimized error handling and authentication.
+    Returns:
+        Dataset: The loaded dataset object ready for query operations
+    Raises:
+        ValueError: When authentication fails or dataset structure is invalid
+        ConnectionError: When network issues prevent dataset access
+    """
+    # HF token must exist for private dataset access
     hf_token = os.getenv("HF_TOKEN")
+    if not hf_token:
+        raise ValueError("HF_TOKEN environment variable missing or empty - authentication required")
+    dataset_id = "latterworks/llama_checker_results"
+    logger.info(f"Initializing dataset access: {dataset_id}")
     try:
+        # First attempt: Try modern token parameter
+        try:
+            dataset = datasets.load_dataset(dataset_id, token=hf_token)
+        except TypeError:
+            # Fallback: Use legacy authentication parameter
+            logger.info("Attempting legacy authentication method")
+            dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
+        # Extract the appropriate split
+        if isinstance(dataset, datasets.DatasetDict):
+            if "train" in dataset:
+                return dataset["train"]
+            # No train split found, use first available
+            first_split = next(iter(dataset))
+            logger.info(f"No 'train' split found, using '{first_split}' split")
+            return dataset[first_split]
         else:
+            # Handle direct Dataset object (no splits)
+            return dataset
     except FileNotFoundError:
+        logger.info(f"Dataset {dataset_id} not found - creating new dataset")
+        # Prepare empty dataset with precise schema
         empty_dataset = datasets.Dataset.from_dict({
             "ip": [],
             "port": [],
             "models": []
         })
+        try:
+            # Create dataset on Hub with correct token parameter
+            empty_dataset.push_to_hub(dataset_id, token=hf_token)
+            logger.info(f"Successfully created empty dataset: {dataset_id}")
+            # Load the newly created dataset
+            try:
+                dataset = datasets.load_dataset(dataset_id, token=hf_token)
+            except TypeError:
+                dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
+            # Extract appropriate split
+            if isinstance(dataset, datasets.DatasetDict):
+                if "train" in dataset:
+                    return dataset["train"]
+                first_split = next(iter(dataset))
+                logger.info(f"Using '{first_split}' split from newly created dataset")
+                return dataset[first_split]
+            else:
+                return dataset
+        except Exception as creation_error:
+            logger.error(f"Dataset creation failed: {creation_error}")
+            raise ValueError(f"Failed to create dataset: {creation_error}") from creation_error
+    except (ConnectionError, TimeoutError) as network_error:
+        logger.error(f"Network error accessing dataset: {network_error}")
+        raise ConnectionError(f"Network failure accessing HuggingFace Hub: {network_error}") from network_error
+    except Exception as general_error:
+        logger.error(f"Unexpected error accessing dataset: {general_error}")
+        raise ValueError(f"Dataset access failed: {general_error}") from general_error
 def scan_shodan(progress=gr.Progress()) -> List[Dict]:
     """
 async def check_ollama_endpoints(instances, progress=gr.Progress()):
     """
+    Efficiently check multiple Ollama endpoints with concurrent processing and comprehensive error handling.
     Args:
         instances: List of Ollama instances from Shodan
+        progress: Gradio progress bar for visual feedback
     Returns:
+        List of Ollama instances with enriched model information
     """
     if not instances:
+        logger.info("No instances to check - skipping endpoint verification")
         return []
+    total_instances = len(instances)
+    logger.info(f"Initiating concurrent validation of {total_instances} Ollama endpoints")
+    progress(0, desc=f"Preparing to check {total_instances} Ollama endpoints")
+    # Configure optimized session with connection pooling and timeouts
+    conn = aiohttp.TCPConnector(limit=50, ttl_dns_cache=300)
+    timeout = aiohttp.ClientTimeout(total=30, connect=5, sock_connect=5, sock_read=20)
+    async with aiohttp.ClientSession(connector=conn, timeout=timeout) as session:
+        # Create task queue
+        tasks = [check_single_endpoint(session, instance) for instance in instances]
+        # Process with dynamic progress tracking
         updated_instances = []
+        completed = 0
+        for future in asyncio.as_completed(tasks):
+            try:
+                # Process completed task
+                instance = await future
+                updated_instances.append(instance)
+                # Update progress with meaningful metrics
+                completed += 1
+                progress_pct = completed / total_instances
+                progress(progress_pct, desc=f"Checked {completed}/{total_instances} endpoints ({progress_pct:.1%})")
+                # Log models found
+                if instance.get('models'):
+                    logger.info(f"Found {len(instance['models'])} models at {instance['ip']}:{instance['port']}")
+            except Exception as task_error:
+                # Handle per-task errors without stopping the process
+                logger.warning(f"Endpoint check failed: {task_error}")
+                # Continue processing remaining endpoints
+    valid_instances = [i for i in updated_instances if i.get('models')]
+    logger.info(f"Endpoint validation complete: {len(valid_instances)}/{total_instances} accessible")
     return updated_instances
 def update_dataset_with_instances(dataset, instances):
     """
+    Efficiently update HuggingFace dataset with optimized delta synchronization.
+    Implements single-pass dataset updates with:
+    1. Optimized in-memory index of existing entries
+    2. Differential detection of new vs. modified instances
+    3. Single hub push with consolidated changes
     Args:
+        dataset: HuggingFace dataset object to update
         instances: List of Ollama instances with model information
     Returns:
+        Updated HuggingFace dataset with synchronized changes
     """
     if not instances:
+        logger.warning("No instance data provided for dataset update operation")
         return dataset
+    start_time = time.time()
+    # Optimization: Create indexed lookup of existing instances for O(1) access
+    dataset_dict = {}
+    for idx, item in enumerate(dataset):
+        key = f"{item['ip']}:{item['port']}"
+        dataset_dict[key] = {
+            'idx': idx,
+            'data': item
+        }
+    # Track modification metrics
+    stats = {
+        'new': 0,
+        'updated': 0,
+        'unchanged': 0,
+        'models_added': 0
+    }
+    # Process differentials
+    update_candidates = []
     new_instances = []
     for instance in instances:
+        # Skip instances without valid IP
+        if not instance.get('ip'):
+            continue
         instance_key = f"{instance['ip']}:{instance['port']}"
         if instance_key in dataset_dict:
+            # Existing instance - determine if update needed
+            existing = dataset_dict[instance_key]['data']
+            needs_update = False
+            # Check metadata changes
+            for field in ['country', 'region', 'org']:
+                if instance.get(field) and instance.get(field) != existing.get(field):
+                    needs_update = True
+            # Check model changes - only update if models were found
             if instance.get('models'):
+                # Compare model signatures to detect changes
+                existing_models = {model.get('name', ''): model for model in existing.get('models', [])}
+                new_models = {model.get('name', ''): model for model in instance.get('models', [])}
+                if set(new_models.keys()) != set(existing_models.keys()):
+                    needs_update = True
+                    stats['models_added'] += len(set(new_models.keys()) - set(existing_models.keys()))
+            if needs_update:
+                # Create updated instance
+                updated = dict(existing)
+                updated.update({
+                    'country': instance.get('country', existing.get('country')),
+                    'region': instance.get('region', existing.get('region')),
+                    'org': instance.get('org', existing.get('org')),
+                })
+                # Only update models if they were found
+                if instance.get('models'):
+                    updated['models'] = instance['models']
+                update_candidates.append(updated)
+                stats['updated'] += 1
+            else:
+                stats['unchanged'] += 1
         else:
+            # New instance
             new_instances.append(instance)
+            stats['new'] += 1
+    # Efficiently construct updated dataset
+    if new_instances or update_candidates:
+        # Start with current dataset
+        current_data = dataset.to_list()
+        # Apply updates
+        for updated in update_candidates:
+            instance_key = f"{updated['ip']}:{updated['port']}"
+            idx = dataset_dict[instance_key]['idx']
+            current_data[idx] = updated
+        # Add new instances
+        current_data.extend(new_instances)
+        # Create updated dataset
+        updated_dataset = datasets.Dataset.from_list(current_data)
+        # Push to hub with single operation
+        hf_token = os.getenv("HF_TOKEN")
+        updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
+        execution_time = time.time() - start_time
+        logger.info(f"Dataset synchronization complete in {execution_time:.2f}s: {stats['new']} new, {stats['updated']} updated, {stats['unchanged']} unchanged, {stats['models_added']} new models")
+        return updated_dataset
+    else:
+        logger.info("No dataset changes detected - skipping hub synchronization")
+        return dataset
 def get_unique_values(dataset):
     """
     return results
 def create_interface():
+    """
+    Create enterprise-grade Gradio interface with optimized data loading and admin authentication.
+    Returns:
+        gr.Blocks: Fully configured Gradio interface ready for deployment
+    """
+    # Administrative authentication function
+    def validate_admin():
+        """Check if current user has admin privileges based on API key"""
+        # For production systems, this would use proper authentication
+        # Currently using API key presence as simple auth mechanism
+        admin_key = os.getenv("ADMIN_KEY", "")
+        shodan_key = os.getenv("SHODAN_API_KEY", "")
+        return bool(admin_key and shodan_key)
     try:
+        # Initialize critical data structures once at startup
+        logger.info("Initializing application data layer")
         dataset = load_or_create_dataset()
+        # Extract model metadata attributes for filtering
         unique_values = get_unique_values(dataset)
+        logger.info(f"Loaded dataset with {len(unique_values['families'])} model families and {len(unique_values['parameter_sizes'])} parameter sizes")
+        # Preload initial model data
         initial_results = search_models(dataset)
+        logger.info(f"Preloaded {len(initial_results)} models for initial display")
+        # Determine administrative access
+        is_admin = validate_admin()
+        admin_status = "enabled" if is_admin else "disabled"
+        logger.info(f"Administrative access: {admin_status}")
+        # Create interface with optimized structure
+        with gr.Blocks(
+            title="Ollama Instance Scanner",
+            theme=gr.themes.Soft(),
+            css=".footer {text-align: center; margin-top: 20px; color: #666;}"
+        ) as interface:
+            # Header section
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("# Ollama Instance Scanner")
+                    gr.Markdown("Browse publicly accessible Ollama models and their capabilities")
+            # Tab container
             with gr.Tabs() as tabs:
+                # Tab 1: Model Browser (Public)
                 with gr.TabItem("Browse Models"):
                     with gr.Row():
+                        # Filter controls
                         with gr.Column(scale=1):
+                            with gr.Box():
+                                gr.Markdown("### Search Filters")
+                                family_dropdown = gr.Dropdown(
+                                    choices=["All"] + unique_values['families'],
+                                    value="All",
+                                    label="Model Family",
+                                    interactive=True
+                                )
+                                parameter_size_dropdown = gr.Dropdown(
+                                    choices=["All"] + unique_values['parameter_sizes'],
+                                    value="All",
+                                    label="Parameter Size",
+                                    interactive=True
+                                )
+                                name_search = gr.Textbox(
+                                    label="Model Name",
+                                    placeholder="Enter model name...",
+                                    interactive=True
+                                )
+                                search_button = gr.Button("Search Models", variant="primary")
+                    # Results section
                     with gr.Row():
+                        # Model results table
                         results_table = gr.DataFrame(
                             value=initial_results,
                             headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
+                            label="Available Models",
+                            interactive=False,
+                            wrap=True
                         )
+                    # Details section
                     with gr.Row():
+                        # Model specifications panel
+                        model_details = gr.JSON(
+                            label="Model Specifications",
+                            visible=True
+                        )
+                # Tab 2: Shodan Scanner (Admin Only)
+                with gr.TabItem("Shodan Scan", visible=is_admin):
+                    with gr.Box():
+                        gr.Markdown("## Ollama Instance Scanner")
+                        gr.Markdown("This tool scans for publicly accessible Ollama instances using Shodan API")
+                        # Scanner controls
+                        with gr.Row():
+                            shodan_scan_button = gr.Button(
+                                "Start Shodan Scan",
+                                variant="primary",
+                                interactive=is_admin
+                            )
+                        # Status display
+                        with gr.Row():
+                            scan_status = gr.Textbox(
+                                label="Scan Status",
+                                value="Ready to scan" if is_admin else "Admin access required",
+                                interactive=False
+                            )
+            # Footer
+            gr.Markdown(
+                "### Ollama Instance Scanner | Powered by Shodan & Hugging Face",
+                elem_classes=["footer"]
+            )
+            # Define optimized event handlers
             def on_search_click(family, parameter_size, name_search):
+                """Process model search with optimized filtering"""
+                try:
+                    # Apply filters
+                    family_filter = None if family == "All" else family
+                    param_size_filter = None if parameter_size == "All" else parameter_size
+                    name_filter = None if not name_search else name_search.strip()
+                    # Execute search with admin privileges if available
+                    results = search_models(
+                        dataset,
+                        family_filter,
+                        param_size_filter,
+                        name_filter,
+                        is_admin
+                    )
+                    logger.info(f"Search completed: {len(results)} models found matching criteria")
+                    return results
+                except Exception as search_error:
+                    logger.error(f"Search failed: {search_error}")
+                    # Return empty results on error
+                    return []
             def on_table_select(evt: gr.SelectData, results):
+                """Handle table row selection with error protection"""
+                try:
+                    if evt and evt.index and len(results) > evt.index[0]:
+                        selected_row = results[evt.index[0]]
+                        # Extract and return model details
+                        return selected_row.get('full_model_info', "{}")
+                    return "{}"
+                except Exception as selection_error:
+                    logger.error(f"Selection error: {selection_error}")
+                    return "{}"
             async def run_shodan_scan():
+                """Execute Shodan scan workflow with comprehensive monitoring"""
+                if not is_admin:
+                    return "Error: Administrative access required"
+                scan_id = int(time.time())  # Generate unique scan identifier
+                logger.info(f"Initiating Shodan scan {scan_id}")
                 try:
+                    # Phase 1: Shodan API scan
                     instances = scan_shodan()
                     if not instances:
+                        return "Scan complete: No Ollama instances found"
+                    instance_count = len(instances)
+                    logger.info(f"Scan {scan_id}: Found {instance_count} potential Ollama instances")
+                    # Phase 2: Endpoint validation
                     updated_instances = await check_ollama_endpoints(instances)
+                    accessible_count = sum(1 for i in updated_instances if i.get('models'))
+                    logger.info(f"Scan {scan_id}: Validated {accessible_count} accessible instances")
+                    # Phase 3: Dataset synchronization
                     nonlocal dataset
                     dataset = update_dataset_with_instances(dataset, updated_instances)
+                    # Phase 4: Interface update
                     nonlocal unique_values
                     unique_values = get_unique_values(dataset)
+                    # Update UI components with new data
                     family_dropdown.choices = ["All"] + unique_values['families']
                     parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
+                    # Build detailed completion report
+                    report = (
+                        f"Scan {scan_id} completed successfully:\n"
+                        f"• {instance_count} total instances discovered\n"
+                        f"• {accessible_count} instances with accessible models\n"
+                        f"• {len(unique_values['families'])} unique model families\n"
+                        f"• {len(unique_values['parameter_sizes'])} parameter size variants"
+                    )
+                    logger.info(f"Scan {scan_id} completed successfully")
+                    return report
+                except Exception as scan_error:
+                    logger.error(f"Scan {scan_id} failed: {scan_error}")
+                    # Generate actionable error message
+                    if isinstance(scan_error, ValueError) and "API key" in str(scan_error):
+                        return "Error: Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable."
+                    elif isinstance(scan_error, ConnectionError):
+                        return "Error: Network connectivity issue. Please check your internet connection."
+                    else:
+                        return f"Error: Scan operation failed - {str(scan_error)}"
+            # Connect event handlers to UI components
             search_button.click(
+                fn=on_search_click,
                 inputs=[family_dropdown, parameter_size_dropdown, name_search],
                 outputs=[results_table]
             )
             results_table.select(
+                fn=on_table_select,
                 inputs=[results_table],
                 outputs=[model_details]
             )
             shodan_scan_button.click(
+                fn=run_shodan_scan,
                 inputs=[],
                 outputs=[scan_status]
             )
+        logger.info("Gradio interface successfully initialized")
         return interface
+    except Exception as interface_error:
+        logger.critical(f"Interface initialization failed: {interface_error}")
+        raise ValueError(f"Failed to create application interface: {interface_error}") from interface_error
+def validate_env_variables():
+    """
+    Centralized validation of critical environment variables with precise error messaging.
+    Raises:
+        ValueError: When any required environment variable is missing
+    """
+    required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
+    missing_vars = [var for var in required_vars if not os.getenv(var)]
+    if missing_vars:
+        error_msg = f"Missing critical environment variables: {', '.join(missing_vars)}"
+        logger.critical(error_msg)
+        raise ValueError(error_msg)
+    # Validate token quality
+    hf_token = os.getenv("HF_TOKEN")
+    if len(hf_token) < 8:  # Minimum length for plausible token
+        logger.warning("HF_TOKEN appears malformed (insufficient length)")
+    logger.info("Environment validation successful - all required variables present")
 def main():
+    """
+    Application entry point with centralized error handling and environment validation.
+    """
     try:
+        # Validate environment once at startup
+        validate_env_variables()
+        # Initialize and launch interface
+        logger.info("Initializing Gradio interface")
         interface = create_interface()
         if interface:
+            logger.info("Starting Gradio server")
             interface.launch()
         else:
+            logger.critical("Interface initialization failed")
+            sys.exit(1)
+    except ValueError as config_error:
+        # Handle configuration errors
+        logger.critical(f"Configuration error: {config_error}")
+        sys.exit(1)
+    except Exception as fatal_error:
+        # Handle unexpected errors
+        logger.critical(f"Fatal application error: {fatal_error}")
+        sys.exit(1)
 if __name__ == "__main__":
     main()