Spaces:

latterworks
/

agen

Sleeping

App Files Files Community

latterworks commited on Mar 19

Commit

44d06c0

verified ·

1 Parent(s): 059111e

Update app.py

Browse files

Files changed (1) hide show

app.py +485 -687

app.py CHANGED Viewed

@@ -1,766 +1,564 @@
-# app.py - Ollama Scanner Application
-import asyncio
-import bcrypt
-import gradio as gr
-import logging
 import os
 import requests
-import shodan
-import time
-from datasets import load_dataset, Dataset
 from huggingface_hub import HfApi, login
-import os
-from typing import List, Dict, Any, Optional, Tuple
-import pandas as pd
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[logging.StreamHandler()]
 )
-logger = logging.getLogger("ollama_scanner")
-# Dataset constants
-DATASET_NAME = "latterworks/llama_checker_results"
-DATASET_COLUMNS = ["ip", "port", "country", "region", "org", "models"]
-# Security helper functions
-def verify_password(entered_password: str, stored_password_hash: str) -> bool:
-    """Verify a password against its hash using bcrypt."""
     try:
-        # Verify password
-        return bcrypt.checkpw(
-            entered_password.encode('utf-8'),
-            stored_password_hash.encode('utf-8')
-        )
     except Exception as e:
-        logger.exception("Password verification error:")
-        return False
-def hash_password(password: str) -> str:
-    """Hash a password using bcrypt."""
-    return bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()).decode('utf-8')
-# Dataset helper functions
-def get_dataset() -> Dataset:
-    """Get the dataset with Ollama instances and models."""
     try:
-        # Try to load the existing dataset
-        hf_token = os.getenv("HF_TOKEN")
-        dataset = load_dataset(DATASET_NAME, use_auth_token=hf_token)
-        logger.info(f"Loaded existing dataset: {DATASET_NAME}")
         return dataset["train"]
     except Exception as e:
-        logger.warning(f"Could not load existing dataset: {e}")
-        # Create a new empty dataset
-        empty_data = {
-            'ip': [],
-            'port': [],
-            'country': [],
-            'region': [],
-            'org': [],
-            'models': []
-        }
-        dataset = Dataset.from_dict(empty_data)
-        logger.info(f"Created new empty dataset")
-        return dataset
-def push_dataset(dataset: Dataset) -> None:
-    """Push the dataset to the Hub."""
-    try:
-        hf_token = os.getenv("HF_TOKEN")
-        dataset.push_to_hub(DATASET_NAME, token=hf_token)
-        logger.info(f"Pushed dataset to Hub: {DATASET_NAME}")
-    except Exception as e:
-        logger.error(f"Failed to push dataset to Hub: {e}")
-def find_instance_index(dataset: Dataset, ip: str, port: int) -> Optional[int]:
-    """Find the index of an instance in the dataset."""
-    for i, item in enumerate(dataset):
-        if item['ip'] == ip and item['port'] == port:
-            return i
-    return None
-def add_or_update_instance(dataset: Dataset, instance_data: Dict[str, Any],
-                           update_models: bool = False) -> Dataset:
-    """Add a new instance or update an existing one in the dataset."""
-    ip = instance_data['ip']
-    port = instance_data['port']
-    # Convert to dictionaries for manipulation
-    items = [dict(item) for item in dataset]
-    # Check if instance exists
-    instance_idx = find_instance_index(dataset, ip, port)
-    if instance_idx is not None:
-        # Update existing instance
-        if update_models or 'models' not in instance_data:
-            # Keep existing models if not updating models or models not provided
-            instance_data['models'] = items[instance_idx].get('models', [])
-        items[instance_idx] = instance_data
     else:
-        # Add new instance
-        if 'models' not in instance_data:
-            instance_data['models'] = []
-        items.append(instance_data)
-    # Convert back to dataset
-    new_dataset = Dataset.from_dict({
-        'ip': [item['ip'] for item in items],
-        'port': [item['port'] for item in items],
-        'country': [item.get('country', '') for item in items],
-        'region': [item.get('region', '') for item in items],
-        'org': [item.get('org', '') for item in items],
-        'models': [item.get('models', []) for item in items]
-    })
-    return new_dataset
-def update_instance_models(dataset: Dataset, ip: str, port: int,
-                          models: List[Dict[str, Any]]) -> Dataset:
-    """Update the models for an existing instance in the dataset."""
-    instance_idx = find_instance_index(dataset, ip, port)
-    if instance_idx is None:
-        logger.error(f"Instance {ip}:{port} not found in dataset")
-        return dataset
-    # Convert to dictionaries for manipulation
-    items = [dict(item) for item in dataset]
-    # Update models
-    items[instance_idx]['models'] = models
-    # Convert back to dataset
-    new_dataset = Dataset.from_dict({
-        'ip': [item['ip'] for item in items],
-        'port': [item['port'] for item in items],
-        'country': [item.get('country', '') for item in items],
-        'region': [item.get('region', '') for item in items],
-        'org': [item.get('org', '') for item in items],
-        'models': [item.get('models', []) for item in items]
-    })
-    return new_dataset
-# Ollama and Shodan functions
-async def check_ollama_endpoint(dataset: Dataset, ip: str, port: int) -> Dataset:
-    """Check an Ollama endpoint and update the dataset with model information."""
     url = f"http://{ip}:{port}/api/tags"
     try:
-        # Send request to Ollama API
-        response = requests.get(url, timeout=5)
         response.raise_for_status()
-        # Parse response
         data = response.json()
-        # Extract models
-        models = []
-        for model_data in data.get('models', []):
-            model = {
-                'name': model_data.get('name', ''),
-                'family': model_data.get('details', {}).get('family', ''),
-                'parameter_size': model_data.get('details', {}).get('parameter_size', ''),
-                'quantization_level': model_data.get('details', {}).get('quantization_level', ''),
-                'digest': model_data.get('digest', ''),
-                'modified_at': model_data.get('modified_at', ''),
-                'size': model_data.get('size', 0)
-            }
-            models.append(model)
-        # Update dataset
-        updated_dataset = update_instance_models(dataset, ip, port, models)
-        logger.info(f"Updated models for {ip}:{port} - found {len(models)} models")
-        return updated_dataset
     except requests.exceptions.RequestException as e:
-        logger.error(f"Network error checking {ip}:{port}: {e}")
-        # Update with empty models list to indicate connection failed
-        updated_dataset = update_instance_models(dataset, ip, port, [])
-        return updated_dataset
     except ValueError as e:
         logger.error(f"Invalid JSON from {ip}:{port}: {e}")
-        # Update with empty models list to indicate invalid JSON
-        updated_dataset = update_instance_models(dataset, ip, port, [])
-        return updated_dataset
     except Exception as e:
-        logger.exception(f"Unexpected error checking {ip}:{port}:")
-        # Update with empty models list to indicate unexpected error
-        updated_dataset = update_instance_models(dataset, ip, port, [])
-        return updated_dataset
-async def scan_ollama_instances(progress=None) -> Tuple[str, Dataset]:
-    """Scan for Ollama instances using Shodan and check each endpoint."""
-    if progress is None:
-        progress = gr.Progress()
-    # Get secrets
-    try:
-        shodan_api_key = os.getenv("SHODAN_API_KEY")
-        shodan_query = os.getenv("SHODAN_QUERY")
-        if not shodan_api_key:
-            raise ValueError("SHODAN_API_KEY environment variable is not set")
-        if not shodan_query:
-            raise ValueError("SHODAN_QUERY environment variable is not set")
-    except Exception as e:
-        logger.error(f"Failed to get secrets: {e}")
-        return "Error: Failed to retrieve secrets", None
-    # Initialize Shodan API
-    api = shodan.Shodan(shodan_api_key)
-    try:
-        # Search for Ollama instances
-        logger.info(f"Starting Shodan search with query: {shodan_query}")
-        results = api.search(shodan_query, limit=1000)
-        total_results = results['total']
-        logger.info(f"Found {total_results} results")
-        # Get dataset
-        dataset = get_dataset()
-        # Process results
-        new_instances = 0
-        updated_instances = 0
-        progress(0, desc="Processing Shodan results")
-        for i, result in enumerate(results['matches']):
-            progress((i + 1) / len(results['matches']), desc="Processing Shodan results")
-            ip = result['ip_str']
-            port = result.get('port', 11434)
-            # Check if already in dataset
-            instance_idx = find_instance_index(dataset, ip, port)
-            # Prepare instance data
-            instance_data = {
-                'ip': ip,
-                'port': port,
-                'country': result.get('location', {}).get('country_name', ''),
-                'region': result.get('location', {}).get('region_name', ''),
-                'org': result.get('org', ''),
-                'models': []
-            }
-            # Add or update instance
-            if instance_idx is None:
-                dataset = add_or_update_instance(dataset, instance_data)
-                new_instances += 1
-            else:
-                dataset = add_or_update_instance(dataset, instance_data, update_models=False)
-                updated_instances += 1
-        # Push initial updates
-        push_dataset(dataset)
-        # Check all instances
-        progress(0, desc="Checking Ollama endpoints")
-        instances = [dict(item) for item in dataset]
-        for i, instance in enumerate(instances):
-            progress((i + 1) / len(instances), desc=f"Checking Ollama endpoint {i+1}/{len(instances)}")
-            ip = instance['ip']
-            port = instance['port']
-            # Check endpoint
-            dataset = await check_ollama_endpoint(dataset, ip, port)
-        # Push final updates
-        push_dataset(dataset)
-        return f"Scan completed. Found {total_results} Shodan results. Added {new_instances} new instances, updated {updated_instances} existing instances.", dataset
-    except shodan.APIError as e:
-        logger.error(f"Shodan API error: {e}")
-        return f"Error: Shodan API error - {e}", None
-    except Exception as e:
-        logger.exception("Unexpected error during scan:")
-        return f"Error: {str(e)}", None
-def filter_and_search_models(
-    dataset: Dataset,
-    family: Optional[str] = None,
-    parameter_size: Optional[str] = None,
-    name_search: Optional[str] = None,
-    is_admin: bool = False
-) -> Tuple[pd.DataFrame, List[Dict[str, Any]]]:
-    """Filter and search models in the dataset."""
-    # Extract all models from all instances
-    all_models = []
-    for instance in dataset:
-        ip = instance['ip']
-        port = instance['port']
-        country = instance.get('country', '')
-        region = instance.get('region', '')
-        org = instance.get('org', '')
-        for model in instance.get('models', []):
-            model_info = {
-                'ip': ip,
-                'port': port,
-                'country': country,
-                'region': region,
-                'org': org,
-                'name': model.get('name', ''),
-                'family': model.get('family', ''),
-                'parameter_size': model.get('parameter_size', ''),
-                'quantization_level': model.get('quantization_level', ''),
-                'digest': model.get('digest', ''),
-                'modified_at': model.get('modified_at', ''),
-                'size_bytes': model.get('size', 0),
-                'size_gb': round(model.get('size', 0) / (1024**3), 2) if model.get('size', 0) else 0
-            }
-            all_models.append(model_info)
-    # Apply filters
-    filtered_models = all_models
-    if family and family != "All":
-        filtered_models = [m for m in filtered_models if m['family'] == family]
-    if parameter_size and parameter_size != "All":
-        filtered_models = [m for m in filtered_models if m['parameter_size'] == parameter_size]
-    if name_search:
-        name_search = name_search.lower()
-        filtered_models = [m for m in filtered_models if name_search in m['name'].lower()]
-    # Create DataFrame for display
-    if filtered_models:
-        display_columns = ['name', 'family', 'parameter_size', 'quantization_level', 'size_gb']
-        if is_admin:
-            display_columns = ['ip', 'port', 'country', 'region'] + display_columns
-        df = pd.DataFrame(filtered_models)[display_columns]
-    else:
-        display_columns = ['name', 'family', 'parameter_size', 'quantization_level', 'size_gb']
-        if is_admin:
-            display_columns = ['ip', 'port', 'country', 'region'] + display_columns
-        df = pd.DataFrame(columns=display_columns)
-    return df, filtered_models
-def get_unique_values(dataset: Dataset) -> Tuple[List[str], List[str]]:
-    """Get unique family and parameter size values from the dataset."""
-    families = set()
-    parameter_sizes = set()
-    for instance in dataset:
-        for model in instance.get('models', []):
-            family = model.get('family', '')
-            parameter_size = model.get('parameter_size', '')
-            if family:
-                families.add(family)
-            if parameter_size:
-                parameter_sizes.add(parameter_size)
-    return ["All"] + sorted(list(families)), ["All"] + sorted(list(parameter_sizes))
-# Gradio interface functions
-def login_submit(password: str) -> Tuple[bool, str, str]:
-    """Handle admin login."""
-    try:
-        stored_password = os.getenv("ADMIN_PASSWORD")
-        if not stored_password:
-            logger.error("ADMIN_PASSWORD environment variable is not set")
-            return False, "error", "Admin password is not configured. Please contact the administrator."
-        # Check if stored password is already hashed
-        if stored_password.startswith('$2b
-def search_models(
-    family: str,
-    parameter_size: str,
-    name_search: str,
-    is_admin: bool,
-    dataset: Dataset
-) -> Tuple[pd.DataFrame, List[Dict[str, Any]]]:
-    """Search and filter models in the dataset."""
-    df, details = filter_and_search_models(
-        dataset,
-        family=None if family == "All" else family,
-        parameter_size=None if parameter_size == "All" else parameter_size,
-        name_search=name_search,
-        is_admin=is_admin
-    )
-    return df, details
-def show_model_details(evt: gr.SelectData, models: List[Dict[str, Any]]) -> Dict[str, Any]:
-    """Show details for a selected model."""
-    if not models or evt.index[0] >= len(models):
-        return {}
-    model = models[evt.index[0]]
-    return model
-# Main Gradio application
-def create_app():
-    with gr.Blocks(title="Ollama Scanner", theme=gr.themes.Soft()) as app:
-        # State variables
-        admin_logged_in = gr.State(False)
-        dataset_state = gr.State(get_dataset())
-        model_details_state = gr.State([])
-        # Header
-        gr.Markdown("# 🔍 Ollama Scanner")
-        gr.Markdown("Browse publicly accessible Ollama instances and their models")
-        # Login tab
-        with gr.Tab("Admin Login") as login_tab:
-            with gr.Group():
-                gr.Markdown("### Admin Login")
-                gr.Markdown("Enter the admin password to access administrative features.")
-                admin_password = gr.Textbox(
-                    type="password",
-                    label="Admin Password",
-                    placeholder="Enter admin password"
-                )
-                login_btn = gr.Button("Login", variant="primary")
-                login_status = gr.Markdown("")
-        # Browse Models tab
-        with gr.Tab("Browse Models") as browse_tab:
-            with gr.Row():
-                # Filters column
-                with gr.Column(scale=1):
-                    gr.Markdown("### Filters")
-                    family_dropdown = gr.Dropdown(
-                        choices=["All"],
-                        value="All",
-                        label="Model Family"
-                    )
-                    parameter_size_dropdown = gr.Dropdown(
-                        choices=["All"],
-                        value="All",
-                        label="Parameter Size"
-                    )
-                    name_search = gr.Textbox(
-                        label="Search by Name",
-                        placeholder="Search model names..."
-                    )
-                    search_btn = gr.Button("Search", variant="primary")
-                    stats_box = gr.Markdown("Loading stats...")
-                # Results column
-                with gr.Column(scale=2):
-                    gr.Markdown("### Results")
-                    results_table = gr.DataFrame(
-                        label="Models",
-                        interactive=False
-                    )
-                    model_json_display = gr.JSON(
-                        label="Model Details",
-                        visible=True
-                    )
-        # Shodan Scan tab (admin only)
-        with gr.Tab("Shodan Scan", visible=False) as scan_tab:
-            with gr.Group():
-                gr.Markdown("### Shodan Scan")
-                gr.Markdown("Scan for publicly accessible Ollama instances using Shodan.")
-                scan_btn = gr.Button("Start Scan", variant="primary")
-                scan_progress = gr.Textbox(
-                    label="Scan Status",
-                    placeholder="Click 'Start Scan' to begin scanning...",
-                    interactive=False
-                )
-        # Login logic
-        def handle_login(password):
-            is_admin, status, message = login_submit(password)
-            if is_admin:
-                return is_admin, message, gr.update(visible=True)
-            else:
-                return is_admin, message, gr.update(visible=False)
-        login_btn.click(
-            handle_login,
-            inputs=[admin_password],
-            outputs=[admin_logged_in, login_status, scan_tab]
         )
-        # Search logic
-        def update_stats(dataset):
-            total_instances = len(dataset)
-            models_count = sum(len(instance.get('models', [])) for instance in dataset)
-            families, parameter_sizes = get_unique_values(dataset)
-            family_count = len(families) - 1  # Subtract "All"
-            return f"**Stats:** {total_instances} instances, {models_count} models, {family_count} families"
-        search_btn.click(
-            search_models,
-            inputs=[family_dropdown, parameter_size_dropdown, name_search, admin_logged_in, dataset_state],
-            outputs=[results_table, model_details_state]
-        )
-        # Model selection logic
-        results_table.select(
-            show_model_details,
-            inputs=[model_details_state],
-            outputs=[model_json_display]
-        )
-        # Scan logic
-        async def run_scan():
-            result, updated_dataset = await scan_ollama_instances()
-            if updated_dataset is not None:
-                return result, updated_dataset, *get_unique_values(updated_dataset), update_stats(updated_dataset)
-            else:
-                return result, None, [], [], ""
-        scan_btn.click(
-            run_scan,
-            inputs=[],
-            outputs=[scan_progress, dataset_state, family_dropdown, parameter_size_dropdown, stats_box]
-        )
-        # Initial data load
-        def init_ui(dataset):
-            families, parameter_sizes = get_unique_values(dataset)
-            stats = update_stats(dataset)
-            # Run initial search
-            df, details = search_models("All", "All", "", False, dataset)
-            return families, parameter_sizes, stats, df, details
-        app.load(
-            init_ui,
-            inputs=[dataset_state],
-            outputs=[family_dropdown, parameter_size_dropdown, stats_box, results_table, model_details_state]
-        )
-    return app
-# Start the application
-if __name__ == "__main__":
-    app = create_app()
-    app.launch()
-):
-            is_valid = verify_password(password, stored_password)
-        else:
-            # Compare directly for first-time setup
-            is_valid = password == stored_password
-            # Hash the password for future use
-            if is_valid:
-                hashed_password = hash_password(stored_password)
-                logger.info("Hashed admin password for future use")
-                # Note: We can't store the hashed password back to environment variables
-                # in a Hugging Face Space environment. This would require a different approach.
-        if is_valid:
-            return True, "success", "Login successful! You now have admin access."
-        else:
-            return False, "error", "Invalid password. Please try again."
-    except Exception as e:
-        logger.exception("Login error:")
-        return False, "error", f"Login error: {str(e)}"
-def search_models(
-    family: str,
-    parameter_size: str,
-    name_search: str,
-    is_admin: bool,
-    dataset: Dataset
-) -> Tuple[pd.DataFrame, List[Dict[str, Any]]]:
-    """Search and filter models in the dataset."""
-    df, details = filter_and_search_models(
-        dataset,
-        family=None if family == "All" else family,
-        parameter_size=None if parameter_size == "All" else parameter_size,
-        name_search=name_search,
-        is_admin=is_admin
-    )
-    return df, details
-def show_model_details(evt: gr.SelectData, models: List[Dict[str, Any]]) -> Dict[str, Any]:
-    """Show details for a selected model."""
-    if not models or evt.index[0] >= len(models):
-        return {}
-    model = models[evt.index[0]]
-    return model
-# Main Gradio application
-def create_app():
-    with gr.Blocks(title="Ollama Scanner", theme=gr.themes.Soft()) as app:
         # State variables
-        admin_logged_in = gr.State(False)
-        dataset_state = gr.State(get_dataset())
-        model_details_state = gr.State([])
-        # Header
-        gr.Markdown("# 🔍 Ollama Scanner")
-        gr.Markdown("Browse publicly accessible Ollama instances and their models")
-        # Login tab
-        with gr.Tab("Admin Login") as login_tab:
-            with gr.Group():
-                gr.Markdown("### Admin Login")
-                gr.Markdown("Enter the admin password to access administrative features.")
-                admin_password = gr.Textbox(
-                    type="password",
-                    label="Admin Password",
-                    placeholder="Enter admin password"
-                )
-                login_btn = gr.Button("Login", variant="primary")
-                login_status = gr.Markdown("")
-        # Browse Models tab
-        with gr.Tab("Browse Models") as browse_tab:
             with gr.Row():
-                # Filters column
-                with gr.Column(scale=1):
-                    gr.Markdown("### Filters")
                     family_dropdown = gr.Dropdown(
-                        choices=["All"],
                         value="All",
                         label="Model Family"
                     )
-                    parameter_size_dropdown = gr.Dropdown(
-                        choices=["All"],
                         value="All",
                         label="Parameter Size"
                     )
                     name_search = gr.Textbox(
-                        label="Search by Name",
-                        placeholder="Search model names..."
                     )
-                    search_btn = gr.Button("Search", variant="primary")
-                    stats_box = gr.Markdown("Loading stats...")
-                # Results column
-                with gr.Column(scale=2):
-                    gr.Markdown("### Results")
-                    results_table = gr.DataFrame(
-                        label="Models",
-                        interactive=False
-                    )
-                    model_json_display = gr.JSON(
-                        label="Model Details",
-                        visible=True
-                    )
-        # Shodan Scan tab (admin only)
-        with gr.Tab("Shodan Scan", visible=False) as scan_tab:
-            with gr.Group():
-                gr.Markdown("### Shodan Scan")
-                gr.Markdown("Scan for publicly accessible Ollama instances using Shodan.")
-                scan_btn = gr.Button("Start Scan", variant="primary")
-                scan_progress = gr.Textbox(
-                    label="Scan Status",
-                    placeholder="Click 'Start Scan' to begin scanning...",
-                    interactive=False
-                )
-        # Login logic
-        def handle_login(password):
-            is_admin, status, message = login_submit(password)
-            if is_admin:
-                return is_admin, message, gr.update(visible=True)
-            else:
-                return is_admin, message, gr.update(visible=False)
-        login_btn.click(
-            handle_login,
-            inputs=[admin_password],
-            outputs=[admin_logged_in, login_status, scan_tab]
-        )
-        # Search logic
-        def update_stats(dataset):
-            total_instances = len(dataset)
-            models_count = sum(len(instance.get('models', [])) for instance in dataset)
-            families, parameter_sizes = get_unique_values(dataset)
-            family_count = len(families) - 1  # Subtract "All"
-            return f"**Stats:** {total_instances} instances, {models_count} models, {family_count} families"
-        search_btn.click(
-            search_models,
-            inputs=[family_dropdown, parameter_size_dropdown, name_search, admin_logged_in, dataset_state],
-            outputs=[results_table, model_details_state]
-        )
-        # Model selection logic
-        results_table.select(
-            show_model_details,
-            inputs=[model_details_state],
-            outputs=[model_json_display]
-        )
-        # Scan logic
-        async def run_scan():
-            result, updated_dataset = await scan_ollama_instances()
-            if updated_dataset is not None:
-                return result, updated_dataset, *get_unique_values(updated_dataset), update_stats(updated_dataset)
-            else:
-                return result, None, [], [], ""
-        scan_btn.click(
-            run_scan,
-            inputs=[],
-            outputs=[scan_progress, dataset_state, family_dropdown, parameter_size_dropdown, stats_box]
-        )
-        # Initial data load
-        def init_ui(dataset):
-            families, parameter_sizes = get_unique_values(dataset)
-            stats = update_stats(dataset)
-            # Run initial search
-            df, details = search_models("All", "All", "", False, dataset)
-            return families, parameter_sizes, stats, df, details
-        app.load(
-            init_ui,
-            inputs=[dataset_state],
-            outputs=[family_dropdown, parameter_size_dropdown, stats_box, results_table, model_details_state]
-        )
     return app
-# Start the application
 if __name__ == "__main__":
-    app = create_app()
-    app.launch()

 import os
+import logging
 import requests
+import bcrypt
+import gradio as gr
 from huggingface_hub import HfApi, login
+from datasets import load_dataset, Dataset, Features, Value, Sequence
+from typing import Dict, List, Optional, Any
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import shodan
+import html
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger(__name__)
+# Function to access secrets
+def get_secret(name: str) -> str:
+    """Get a secret from Hugging Face Space secrets."""
     try:
+        from huggingface_hub import secrets
+        return secrets.get_secret(name)
     except Exception as e:
+        logger.warning(f"Error accessing secret {name}: {e}")
+        return ""
+# Sanitize inputs
+def sanitize_input(text: str) -> str:
+    """Sanitize user input to prevent injection attacks."""
+    if not text:
+        return ""
+    return html.escape(text)
+# Dataset functions
+def get_or_create_dataset(repo_id: str, token: Optional[str] = None) -> Dataset:
+    """
+    Get or create a dataset with the specified schema.
+    Args:
+        repo_id: The Hugging Face dataset repository ID
+        token: Authentication token for private datasets
+    Returns:
+        The dataset object
+    """
     try:
+        # Try to load the dataset
+        dataset = load_dataset(repo_id, token=token)
         return dataset["train"]
     except Exception as e:
+        logger.info(f"Dataset {repo_id} not found or error loading it: {e}")
+        # Create a new dataset with the required schema
+        features = Features({
+            "ip": Value("string"),
+            "port": Value("int32"),
+            "country": Value("string", default_value=""),
+            "region": Value("string", default_value=""),
+            "org": Value("string", default_value=""),
+            "models": Sequence({
+                "name": Value("string"),
+                "family": Value("string", default_value=""),
+                "parameter_size": Value("string", default_value=""),
+                "quantization_level": Value("string", default_value=""),
+                "digest": Value("string", default_value=""),
+                "modified_at": Value("string", default_value=""),
+                "size": Value("int64", default_value=0)
+            }, default_value=[])
+        })
+        # Create empty dataset
+        empty_dataset = Dataset.from_dict({
+            "ip": [],
+            "port": [],
+            "country": [],
+            "region": [],
+            "org": [],
+            "models": []
+        }, features=features)
+        # Push to hub
+        empty_dataset.push_to_hub(repo_id, token=token)
+        return empty_dataset
+def update_dataset_entry(dataset: Dataset, entry: Dict[str, Any]) -> Dataset:
+    """
+    Add or update an entry in the dataset.
+    Args:
+        dataset: The dataset to update
+        entry: The entry to add or update
+    Returns:
+        The updated dataset
+    """
+    # Check if entry already exists by IP and port
+    ip = entry["ip"]
+    port = entry["port"]
+    # Find the index if it exists
+    existing_idx = None
+    for idx, item in enumerate(dataset):
+        if item["ip"] == ip and item["port"] == port:
+            existing_idx = idx
+            break
+    if existing_idx is not None:
+        # Update existing entry
+        dataset = dataset.select(list(range(len(dataset))))
+        new_examples = list(dataset)
+        new_examples[existing_idx] = entry
+        return Dataset.from_dict({k: [ex[k] for ex in new_examples] for k in dataset.column_names})
     else:
+        # Add new entry
+        new_dataset = dataset.add_item(entry)
+        return new_dataset
+def push_dataset_to_hub(dataset: Dataset, repo_id: str, token: Optional[str] = None):
+    """
+    Push dataset to the Hugging Face Hub.
+    Args:
+        dataset: The dataset to push
+        repo_id: The repository ID
+        token: Authentication token
+    """
+    try:
+        dataset.push_to_hub(repo_id, token=token)
+        logger.info(f"Successfully pushed dataset to {repo_id}")
+    except Exception as e:
+        logger.error(f"Error pushing dataset to hub: {e}")
+# Shodan functions
+def scan_with_shodan(shodan_api_key: str, query: str, max_results: int = 1000) -> List[Dict[str, Any]]:
+    """
+    Scan with Shodan API for Ollama instances.
+    Args:
+        shodan_api_key: Shodan API key
+        query: Shodan search query
+        max_results: Maximum number of results to return
+    Returns:
+        List of discovered instances
+    """
+    if not shodan_api_key:
+        logger.error("No Shodan API key provided")
+        return []
+    try:
+        api = shodan.Shodan(shodan_api_key)
+        results = []
+        # Get the number of total results
+        count_result = api.count(query)
+        total_results = count_result['total']
+        logger.info(f"Found {total_results} results for query: {query}")
+        # Limit to max_results
+        pages = min(total_results, max_results) // 100
+        if min(total_results, max_results) % 100 > 0:
+            pages += 1
+        for page in range(1, pages + 1):
+            try:
+                result_page = api.search(query, page=page)
+                for match in result_page['matches']:
+                    instance = {
+                        "ip": match.get("ip_str", ""),
+                        "port": match.get("port", 11434),  # Default Ollama port
+                        "country": match.get("location", {}).get("country_name", ""),
+                        "region": match.get("location", {}).get("region_name", ""),
+                        "org": match.get("org", ""),
+                        "models": []  # Will be populated later
+                    }
+                    results.append(instance)
+                logger.info(f"Processed page {page}/{pages}")
+            except shodan.APIError as e:
+                logger.error(f"Shodan API error on page {page}: {e}")
+                break
+        return results
+    except shodan.APIError as e:
+        logger.error(f"Shodan API error: {e}")
+        return []
+# Ollama endpoint checking
+def check_ollama_endpoint(instance: Dict[str, Any], timeout: int = 5) -> Dict[str, Any]:
+    """
+    Check an Ollama endpoint and extract model information.
+    Args:
+        instance: Instance information (ip, port, etc.)
+        timeout: Request timeout in seconds
+    Returns:
+        Updated instance with model information
+    """
+    ip = instance["ip"]
+    port = instance["port"]
     url = f"http://{ip}:{port}/api/tags"
+    updated_instance = instance.copy()
     try:
+        response = requests.get(url, timeout=timeout)
         response.raise_for_status()
         data = response.json()
+        if "models" in data:
+            models_list = []
+            for model in data["models"]:
+                # Extract model details
+                model_info = {
+                    "name": model.get("name", ""),
+                    "family": model.get("details", {}).get("family", ""),
+                    "parameter_size": model.get("details", {}).get("parameter_size", ""),
+                    "quantization_level": model.get("details", {}).get("quantization_level", ""),
+                    "digest": model.get("digest", ""),
+                    "modified_at": model.get("modified_at", ""),
+                    "size": model.get("size", 0)
+                }
+                models_list.append(model_info)
+            updated_instance["models"] = models_list
+            logger.info(f"Successfully extracted {len(models_list)} models from {ip}:{port}")
+        else:
+            logger.warning(f"No models found in response from {ip}:{port}")
+            updated_instance["models"] = []
     except requests.exceptions.RequestException as e:
+        logger.error(f"Network error for {ip}:{port}: {e}")
+        updated_instance["models"] = []
     except ValueError as e:
         logger.error(f"Invalid JSON from {ip}:{port}: {e}")
+        updated_instance["models"] = []
     except Exception as e:
+        logger.exception(f"Unexpected error for {ip}:{port}: {e}")
+        updated_instance["models"] = []
+    return updated_instance
+# Authentication functions
+def verify_password(password: str, stored_password: str) -> bool:
+    """
+    Verify if the entered password matches the stored password.
+    Args:
+        password: The entered password
+        stored_password: The stored password (hashed or plaintext)
+    Returns:
+        True if passwords match, False otherwise
+    """
+    # Sanitize input
+    password = sanitize_input(password)
+    # Check if stored password is a bcrypt hash
+    if stored_password.startswith('$2b$'):
+        return bcrypt.checkpw(password.encode('utf-8'), stored_password.encode('utf-8'))
+    else:
+        # Direct comparison for development/testing
+        return password == stored_password
+# UI creation
+def create_ui():
+    """
+    Create the Gradio UI for the application.
+    Returns:
+        Gradio interface
+    """
+    # Get secrets
+    admin_password = get_secret("ADMIN_PASSWORD")
+    if not admin_password:
+        admin_password = "admin"  # Default for development (should be replaced in production)
+        logger.warning("Admin password not set, using default (insecure)")
+    shodan_api_key = get_secret("SHODAN_API_KEY")
+    if not shodan_api_key:
+        logger.warning("Shodan API key not set, scans will not work")
+    shodan_query = get_secret("SHODAN_QUERY")
+    if not shodan_query:
+        shodan_query = "product:Ollama port:11434"
+        logger.info(f"Using default Shodan query: {shodan_query}")
+    hf_token = get_secret("HF_TOKEN")
+    # Load dataset
+    dataset_repo_id = "latterworks/llama_checker_results"
+    dataset = get_or_create_dataset(dataset_repo_id, token=hf_token)
+    # Function to search and display models
+    def search_models(family, param_size, name, current_dataset):
+        # Sanitize inputs
+        name = sanitize_input(name)
+        results = []
+        for item in current_dataset:
+            if item["models"]:
+                for model in item["models"]:
+                    # Apply filters
+                    if family != "All" and model["family"] != family:
+                        continue
+                    if param_size != "All" and model["parameter_size"] != param_size:
+                        continue
+                    if name and name.lower() not in model["name"].lower():
+                        continue
+                    # Calculate size in GB
+                    size_gb = round(model["size"] / (1024 * 1024 * 1024), 2) if model["size"] else 0
+                    # Add to results
+                    results.append([
+                        model["name"],
+                        model["family"],
+                        model["parameter_size"],
+                        model["quantization_level"],
+                        size_gb
+                    ])
+        return results
+    # Function to display model details
+    def show_model_details(evt: gr.SelectData, results, is_admin, current_dataset):
+        selected_row = evt.index[0]
+        model_name = results[selected_row][0]
+        # Find the model
+        for item in current_dataset:
+            if item["models"]:
+                for model in item["models"]:
+                    if model["name"] == model_name:
+                        details = {
+                            "name": model["name"],
+                            "family": model["family"],
+                            "parameter_size": model["parameter_size"],
+                            "quantization_level": model["quantization_level"],
+                            "digest": model["digest"],
+                            "modified_at": model["modified_at"],
+                            "size_bytes": model["size"],
+                            "size_gb": round(model["size"] / (1024 * 1024 * 1024), 2) if model["size"] else 0
+                        }
+                        # Include IP and port for admin users
+                        if is_admin:
+                            details["ip"] = item["ip"]
+                            details["port"] = item["port"]
+                            details["country"] = item["country"]
+                            details["region"] = item["region"]
+                            details["org"] = item["org"]
+                        return details
+        return {"error": "Model not found"}
+    # Function to update admin visibility
+    def update_admin_visibility(is_admin):
+        return (
+            gr.update(visible=not is_admin),  # admin_required
+            gr.update(visible=is_admin)       # scan_group
         )
+    # Function to perform scan
+    def perform_scan(max_results, is_admin, current_dataset):
+        if not is_admin:
+            return "⚠️ Admin login required", [], current_dataset
+        # Start scan
+        yield "🔍 Starting Shodan scan...", [], current_dataset
+        try:
+            # Get instances from Shodan
+            instances = scan_with_shodan(shodan_api_key, shodan_query, max_results)
+            yield f"🔍 Found {len(instances)} instances. Checking endpoints...", [], current_dataset
+            # Check endpoints using executor
+            updated_instances = []
+            with ThreadPoolExecutor(max_workers=10) as executor:
+                # Create future tasks
+                future_to_instance = {
+                    executor.submit(check_ollama_endpoint, instance): instance
+                    for instance in instances
+                }
+                # Process completed tasks
+                for future in as_completed(future_to_instance):
+                    try:
+                        updated_instance = future.result()
+                        updated_instances.append(updated_instance)
+                    except Exception as e:
+                        instance = future_to_instance[future]
+                        logger.exception(f"Error processing {instance['ip']}:{instance['port']}: {e}")
+                        # In case of error, append the original instance without model info
+                        instance["models"] = []
+                        updated_instances.append(instance)
+            # Update dataset
+            updated_dataset = current_dataset
+            for instance in updated_instances:
+                updated_dataset = update_dataset_entry(updated_dataset, instance)
+            # Push to hub
+            push_dataset_to_hub(updated_dataset, dataset_repo_id, token=hf_token)
+            # Prepare results for display
+            results = []
+            total_models = 0
+            for instance in updated_instances:
+                models_count = len(instance["models"]) if instance["models"] else 0
+                total_models += models_count
+                results.append([
+                    instance["ip"],
+                    instance["port"],
+                    instance["country"],
+                    instance["region"],
+                    instance["org"],
+                    models_count
+                ])
+            yield f"✅ Scan completed! Found {len(instances)} instances with a total of {total_models} models.", results, updated_dataset
+        except Exception as e:
+            logger.exception(f"Error during scan: {e}")
+            yield f"❌ Error during scan: {str(e)}", [], current_dataset
+    # Create Gradio UI
+    with gr.Blocks(title="Ollama Instance Scanner") as app:
         # State variables
+        current_dataset = gr.State(dataset)
+        is_admin = gr.State(False)
+        with gr.Tab("Browse Models"):
+            # Filters
             with gr.Row():
+                with gr.Column():
+                    # Extract unique values for family and parameter_size
+                    families = set()
+                    parameter_sizes = set()
+                    for item in dataset:
+                        if item["models"]:
+                            for model in item["models"]:
+                                if model["family"]:
+                                    families.add(model["family"])
+                                if model["parameter_size"]:
+                                    parameter_sizes.add(model["parameter_size"])
+                    # Create dropdowns
                     family_dropdown = gr.Dropdown(
+                        choices=["All"] + sorted(list(families)),
                         value="All",
                         label="Model Family"
                     )
+                    param_size_dropdown = gr.Dropdown(
+                        choices=["All"] + sorted(list(parameter_sizes)),
                         value="All",
                         label="Parameter Size"
                     )
                     name_search = gr.Textbox(
+                        value="",
+                        label="Model Name Contains"
                     )
+            # Search button
+            search_btn = gr.Button("Search")
+            # Results dataframe
+            results_df = gr.DataFrame(
+                value=[],
+                headers=["Name", "Family", "Parameter Size", "Quantization", "Size (GB)"],
+                label="Model Results"
+            )
+            # Detailed info JSON viewer
+            model_details = gr.JSON(label="Model Details")
+            # Connect events
+            search_btn.click(
+                search_models,
+                inputs=[family_dropdown, param_size_dropdown, name_search, current_dataset],
+                outputs=[results_df]
+            )
+            results_df.select(
+                show_model_details,
+                inputs=[results_df, is_admin, current_dataset],
+                outputs=[model_details]
+            )
+        with gr.Tab("Admin Login"):
+            admin_pw_input = gr.Textbox(
+                value="",
+                type="password",
+                label="Admin Password"
+            )
+            login_btn = gr.Button("Login")
+            login_status = gr.Markdown("Not logged in")
+            def admin_login(password):
+                if verify_password(password, admin_password):
+                    return "✅ Successfully logged in as admin", True
+                else:
+                    return "❌ Invalid password", False
+            login_btn.click(
+                admin_login,
+                inputs=[admin_pw_input],
+                outputs=[login_status, is_admin]
+            )
+        with gr.Tab("Shodan Scan") as shodan_tab:
+            # This tab is initially hidden and only shown to admins
+            admin_required = gr.Markdown("⚠️ Admin login required to access this feature")
+            with gr.Group(visible=False) as scan_group:
+                max_results = gr.Slider(
+                    minimum=10,
+                    maximum=1000,
+                    value=100,
+                    step=10,
+                    label="Max Results"
+                )
+                scan_btn = gr.Button("Start Scan")
+                scan_status = gr.Markdown("Ready to scan")
+                # Admin results dataframe with IP and port
+                admin_results_df = gr.DataFrame(
+                    value=[],
+                    headers=["IP", "Port", "Country", "Region", "Organization", "Models Count"],
+                    label="Scan Results"
+                )
+            # Connect events
+            is_admin.change(
+                update_admin_visibility,
+                inputs=[is_admin],
+                outputs=[admin_required, scan_group]
+            )
+            scan_btn.click(
+                perform_scan,
+                inputs=[max_results, is_admin, current_dataset],
+                outputs=[scan_status, admin_results_df, current_dataset]
+            )
     return app
+# Main function
+def main():
+    app = create_ui()
+    app.launch()
 if __name__ == "__main__":
+    main()