latterworks commited on
Commit
58a3354
·
verified ·
1 Parent(s): 4cda29c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +414 -309
app.py CHANGED
@@ -1,421 +1,526 @@
1
  import os
2
  import logging
3
- import asyncio
4
- import time
5
- from typing import Dict, List, Optional, Any, Tuple
6
-
7
- import gradio as gr
8
  import datasets
9
  import shodan
10
- import requests
 
 
 
 
11
 
12
- # Set up logging
13
- logging.basicConfig(
14
- level=logging.INFO,
15
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
16
- )
17
  logger = logging.getLogger(__name__)
18
 
 
 
 
 
 
 
19
 
20
  def load_or_create_dataset():
21
- """
22
- Load or create the dataset.
23
-
24
- Returns:
25
- HuggingFace dataset
26
- """
27
  hf_token = os.getenv("HF_TOKEN")
28
- if not hf_token:
29
- raise ValueError("HF_TOKEN environment variable is not set")
30
 
31
  try:
32
- dataset = datasets.load_dataset("latterworks/llama_checker_results", use_auth_token=hf_token)
33
- # Convert to in-memory dataset for easier manipulation
34
- dataset = dataset['train']
 
 
 
 
 
 
 
 
35
  except FileNotFoundError:
36
- # Dataset doesn't exist, create it
37
- dataset = datasets.Dataset.from_dict({"ip": [], "port": [], "country": [], "region": [], "org": [], "models": []})
38
- dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
39
- dataset = datasets.load_dataset("latterworks/llama_checker_results", use_auth_token=hf_token)['train']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  except Exception as e:
41
- logging.error(f"Failed to load or create dataset: {e}")
42
- raise # Re-raise the exception to stop the application
43
-
44
- return dataset
45
-
46
 
47
  def scan_shodan(progress=gr.Progress()) -> List[Dict]:
48
  """
49
- Scan Shodan for Ollama instances.
50
 
51
  Args:
52
- progress: Gradio progress bar
53
-
54
  Returns:
55
- List of dictionaries containing information about Ollama instances
56
  """
57
- # Validate Shodan API key exists
58
- shodan_api_key = os.getenv("SHODAN_API_KEY")
59
- if not shodan_api_key:
60
- raise ValueError("SHODAN_API_KEY environment variable is not set")
61
-
62
- # Get Shodan query
63
  shodan_query = os.getenv("SHODAN_QUERY", "product:Ollama port:11434")
64
 
65
- # Initialize Shodan API
66
- api = shodan.Shodan(shodan_api_key)
67
 
68
  try:
69
- # Search Shodan
70
- logger.info(f"Searching Shodan with query: {shodan_query}")
71
- results = api.search(shodan_query)
 
72
 
73
- # Process results
74
  instances = []
75
- total_results = results['total']
76
- logger.info(f"Found {total_results} results")
77
 
78
- # Set up progress bar
79
- progress(0, desc="Scanning Shodan for Ollama instances")
80
 
81
- for i, result in enumerate(results['matches']):
82
- progress((i+1)/len(results['matches']), desc=f"Processing result {i+1}/{len(results['matches'])}")
83
-
84
- instance = {
85
- 'ip': result['ip_str'],
86
- 'port': result.get('port', 11434),
87
- 'country': result.get('location', {}).get('country_name'),
88
- 'region': result.get('location', {}).get('region_name'),
89
- 'org': result.get('org'),
90
- 'models': []
91
- }
92
- instances.append(instance)
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  return instances
95
-
96
  except shodan.APIError as e:
97
- logger.error(f"Shodan API error: {e}")
98
- raise
 
 
 
 
 
 
 
 
99
  except Exception as e:
100
- logger.error(f"Error during Shodan scan: {e}")
101
  raise
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- async def check_single_endpoint(ip: str, port: int) -> Optional[List[Dict]]:
105
  """
106
- Check a single Ollama endpoint for available models.
107
 
108
  Args:
109
- ip: IP address of the endpoint
110
- port: Port number of the endpoint
111
-
112
  Returns:
113
- List of models if successful, None otherwise
114
  """
115
- url = f"http://{ip}:{port}/api/tags"
 
 
 
116
 
117
- try:
118
- # Set a timeout of 5 seconds
119
- response = requests.get(url, timeout=5)
120
- if response.status_code == 200:
121
- data = response.json()
122
- return data.get('models', [])
123
- else:
124
- logger.warning(f"Failed to get models from {ip}:{port}, status code: {response.status_code}")
125
- return None
126
- except requests.exceptions.RequestException as e:
127
- logger.warning(f"Error connecting to {ip}:{port}: {e}")
128
- return None
129
- except Exception as e:
130
- logger.warning(f"Unexpected error checking {ip}:{port}: {e}")
131
- return None
132
-
133
 
134
- async def check_ollama_endpoints(instances: List[Dict], dataset, progress=gr.Progress()) -> datasets.Dataset:
135
  """
136
- Check all Ollama endpoints for available models asynchronously.
137
 
138
  Args:
139
- instances: List of dictionaries containing information about Ollama instances
140
  dataset: HuggingFace dataset
141
- progress: Gradio progress bar
142
 
143
  Returns:
144
- Updated dataset
145
  """
146
- # Validate HF token exists
147
- hf_token = os.getenv("HF_TOKEN")
148
- if not hf_token:
149
- raise ValueError("HF_TOKEN environment variable is not set")
150
-
151
- # Convert dataset to dictionary for easier manipulation
152
- dataset_dict = {f"{item['ip']}:{item['port']}": item for item in dataset}
153
 
154
  # Process each instance
155
- progress(0, desc="Checking Ollama endpoints")
156
- for i, instance in enumerate(instances):
157
- progress((i+1)/len(instances), desc=f"Checking endpoint {i+1}/{len(instances)}")
158
-
159
- ip = instance['ip']
160
- port = instance['port']
161
- key = f"{ip}:{port}"
162
-
163
- # Get models from the endpoint
164
- models = await check_single_endpoint(ip, port)
165
 
166
- if models:
167
- # Update instance with models
168
- instance['models'] = models
 
 
169
 
170
- # Update or add to dataset dictionary
171
- dataset_dict[key] = instance
 
 
 
 
 
 
172
 
173
- # Convert back to dataset
174
- updated_dataset = datasets.Dataset.from_dict({
175
- "ip": [item['ip'] for item in dataset_dict.values()],
176
- "port": [item['port'] for item in dataset_dict.values()],
177
- "country": [item.get('country', '') for item in dataset_dict.values()],
178
- "region": [item.get('region', '') for item in dataset_dict.values()],
179
- "org": [item.get('org', '') for item in dataset_dict.values()],
180
- "models": [item.get('models', []) for item in dataset_dict.values()]
181
- })
182
 
183
- # Push updates to hub
 
 
 
 
184
  updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
185
 
 
 
186
  return updated_dataset
187
 
188
-
189
- def get_unique_values(dataset) -> Tuple[List[str], List[str], List[str]]:
190
  """
191
- Get unique values for family, parameter_size, and name.
192
 
193
  Args:
194
  dataset: HuggingFace dataset
195
-
196
  Returns:
197
- Tuple of lists containing unique values for family, parameter_size, and name
198
  """
 
199
  families = set()
200
  parameter_sizes = set()
201
- names = set()
202
-
203
- for item in dataset:
204
- for model in item.get('models', []):
205
- if 'family' in model and model['family']:
206
- families.add(model['family'])
207
- if 'parameter_size' in model and model['parameter_size']:
208
- parameter_sizes.add(model['parameter_size'])
209
- if 'name' in model and model['name']:
210
- names.add(model['name'])
211
 
212
- # Convert to sorted lists and add empty option
213
- families = [''] + sorted(list(families))
214
- parameter_sizes = [''] + sorted(list(parameter_sizes))
215
- names = sorted(list(names))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- return families, parameter_sizes, names
218
-
 
 
219
 
220
- def search_models(
221
- dataset,
222
- family: str = "",
223
- parameter_size: str = "",
224
- name: str = "",
225
- is_admin: bool = False
226
- ) -> Tuple[List[Dict], List[Dict]]:
227
  """
228
- Search models based on criteria.
229
 
230
  Args:
231
  dataset: HuggingFace dataset
232
  family: Filter by model family
233
  parameter_size: Filter by parameter size
234
- name: Filter by model name
235
- is_admin: Whether the user is an admin
236
-
237
  Returns:
238
- Tuple of (results, selected model info)
239
  """
240
  results = []
241
 
242
- for item in dataset:
243
- for model in item.get('models', []):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  # Apply filters
245
- if family and model.get('family', '') != family:
246
  continue
247
- if parameter_size and model.get('parameter_size', '') != parameter_size:
 
248
  continue
249
- if name and name.lower() not in model.get('name', '').lower():
 
250
  continue
251
 
252
- # Create result with model info
253
  result = {
254
- 'name': model.get('name', ''),
255
- 'family': model.get('family', ''),
256
- 'parameter_size': model.get('parameter_size', ''),
257
- 'quantization_level': model.get('quantization_level', ''),
258
- 'size': round(model.get('size', 0) / (1024**3), 2) # Convert to GB
 
 
 
259
  }
260
 
261
- # Add IP and port only for admin users
 
 
 
262
  if is_admin:
263
- result['ip'] = item['ip']
264
- result['port'] = item['port']
265
 
266
  results.append(result)
267
 
268
- # For empty result, return empty JSON info
269
- selected_model_info = [{}]
270
-
271
- return results, selected_model_info
272
-
273
-
274
- def get_model_info(model_row: Dict) -> Dict:
275
- """
276
- Get detailed information about a selected model.
277
-
278
- Args:
279
- model_row: Selected model row from the results
280
-
281
- Returns:
282
- Dictionary containing detailed model information
283
- """
284
- return model_row
285
-
286
 
287
  def create_interface():
288
- """
289
- Create Gradio interface for the application.
290
-
291
- Returns:
292
- Gradio interface
293
- """
294
- # Load or create dataset
295
- dataset = load_or_create_dataset()
296
-
297
- # Check for admin mode
298
- is_admin = os.getenv("ADMIN_MODE", "false").lower() == "true"
299
-
300
- # Get unique values for dropdown menus
301
- families, parameter_sizes, names = get_unique_values(dataset)
302
-
303
- # Get initial search results
304
- initial_results, initial_model_info = search_models(dataset, is_admin=is_admin)
305
-
306
- # Function to run Shodan scan
307
- def run_shodan_scan(progress=gr.Progress()):
308
- nonlocal dataset
309
- instances = scan_shodan(progress)
310
- dataset = asyncio.run(check_ollama_endpoints(instances, dataset, progress))
311
 
312
- # Update unique values
313
- updated_families, updated_parameter_sizes, updated_names = get_unique_values(dataset)
314
 
315
- # Update search results
316
- updated_results, updated_model_info = search_models(dataset, is_admin=is_admin)
317
 
318
- return (
319
- updated_families, updated_parameter_sizes,
320
- updated_results, updated_model_info
321
- )
322
-
323
- # Function to run model search
324
- def run_search(family, parameter_size, name):
325
- results, model_info = search_models(dataset, family, parameter_size, name, is_admin=is_admin)
326
- return results, model_info
327
-
328
- # Function to get model details when a row is selected
329
- def select_model(evt: gr.SelectData, results):
330
- if evt.index[0] < len(results):
331
- selected = results[evt.index[0]]
332
- return selected
333
- return {}
334
-
335
- # Create Gradio interface
336
- with gr.Blocks(title="Ollama Instance Scanner") as interface:
337
- gr.Markdown("# Ollama Instance Scanner")
338
-
339
- with gr.Tabs():
340
- # Browse Models tab
341
- with gr.TabItem("Browse Models"):
342
- with gr.Row():
343
- with gr.Column():
344
- family_dropdown = gr.Dropdown(
345
- choices=families,
346
- label="Model Family",
347
- value=""
348
- )
349
- parameter_size_dropdown = gr.Dropdown(
350
- choices=parameter_sizes,
351
- label="Parameter Size",
352
- value=""
353
- )
354
- name_search = gr.Textbox(
355
- label="Model Name",
356
- placeholder="Search by name..."
357
  )
358
- search_button = gr.Button("Search")
 
 
359
 
360
- results_df = gr.DataFrame(
361
- value=initial_results,
362
- label="Search Results",
363
- headers=["name", "family", "parameter_size", "quantization_level", "size"],
364
- row_count=10,
365
- interactive=False
366
- )
 
 
 
 
 
 
 
367
 
368
- model_info = gr.JSON(
369
- value=initial_model_info[0] if initial_model_info else {},
370
- label="Model Details"
371
- )
372
 
373
- # Event handlers
374
- search_button.click(
375
- fn=run_search,
376
- inputs=[family_dropdown, parameter_size_dropdown, name_search],
377
- outputs=[results_df, model_info]
378
- )
379
 
380
- results_df.select(
381
- fn=select_model,
382
- inputs=[results_df],
383
- outputs=[model_info]
384
- )
385
 
386
- # Shodan Scan tab
387
- with gr.TabItem("Shodan Scan"):
388
- # Check if Shodan API key is available
389
- shodan_api_key = os.getenv("SHODAN_API_KEY")
390
-
391
- if shodan_api_key:
392
- scan_button = gr.Button("Start Scan")
393
- scan_output = gr.Markdown("Press the button to start scanning Shodan for Ollama instances.")
 
 
 
 
 
 
394
 
395
- # Event handlers
396
- scan_button.click(
397
- fn=run_shodan_scan,
398
- outputs=[
399
- family_dropdown, parameter_size_dropdown,
400
- results_df, model_info
401
- ]
402
- )
403
- else:
404
- gr.Markdown("## Shodan API key not configured")
405
- gr.Markdown(
406
- "To use the Shodan scan feature, you need to set the `SHODAN_API_KEY` "
407
- "environment variable in your Hugging Face Space settings."
408
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
- return interface
411
-
 
412
 
413
  def main():
414
  """Main function to run the application."""
415
- # Create and launch interface
416
- interface = create_interface()
417
- interface.launch()
418
-
 
 
 
 
419
 
420
  if __name__ == "__main__":
421
  main()
 
1
  import os
2
  import logging
 
 
 
 
 
3
  import datasets
4
  import shodan
5
+ import asyncio
6
+ import aiohttp
7
+ import json
8
+ import gradio as gr
9
+ from typing import List, Dict, Any, Optional
10
 
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
 
 
13
  logger = logging.getLogger(__name__)
14
 
15
+ def validate_env_variables():
16
+ """Validate that required environment variables are set."""
17
+ required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
18
+ missing_vars = [var for var in required_vars if not os.getenv(var)]
19
+ if missing_vars:
20
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
21
 
22
  def load_or_create_dataset():
23
+ """Load the dataset from HuggingFace or create it if it doesn't exist."""
24
+ validate_env_variables()
 
 
 
 
25
  hf_token = os.getenv("HF_TOKEN")
 
 
26
 
27
  try:
28
+ logger.info("Attempting to load dataset from HuggingFace Hub")
29
+ dataset = datasets.load_dataset(
30
+ "latterworks/llama_checker_results",
31
+ use_auth_token=hf_token
32
+ )
33
+ if "train" in dataset:
34
+ return dataset["train"]
35
+ else:
36
+ # If there's no train split, just take the first available split
37
+ return dataset[next(iter(dataset))]
38
+
39
  except FileNotFoundError:
40
+ logger.info("Dataset not found, creating a new one")
41
+ # Create an empty dataset with the required schema
42
+ empty_dataset = datasets.Dataset.from_dict({
43
+ "ip": [],
44
+ "port": [],
45
+ "country": [],
46
+ "region": [],
47
+ "org": [],
48
+ "models": []
49
+ })
50
+
51
+ # Push the empty dataset to HuggingFace Hub
52
+ empty_dataset.push_to_hub(
53
+ "latterworks/llama_checker_results",
54
+ token=hf_token
55
+ )
56
+
57
+ # Load the newly created dataset
58
+ dataset = datasets.load_dataset(
59
+ "latterworks/llama_checker_results",
60
+ use_auth_token=hf_token
61
+ )
62
+
63
+ if "train" in dataset:
64
+ return dataset["train"]
65
+ else:
66
+ return dataset[next(iter(dataset))]
67
+
68
  except Exception as e:
69
+ logger.error(f"Failed to load or create dataset: {e}")
70
+ raise
 
 
 
71
 
72
  def scan_shodan(progress=gr.Progress()) -> List[Dict]:
73
  """
74
+ Scan Shodan for Ollama instances using search_cursor for comprehensive result retrieval.
75
 
76
  Args:
77
+ progress: Gradio progress bar for visual feedback
78
+
79
  Returns:
80
+ List of Ollama instances from Shodan with comprehensive metadata
81
  """
82
+ # API key fetch - no validation needed as it's centralized at startup
83
+ api_key = os.getenv("SHODAN_API_KEY")
 
 
 
 
84
  shodan_query = os.getenv("SHODAN_QUERY", "product:Ollama port:11434")
85
 
86
+ api = shodan.Shodan(api_key)
 
87
 
88
  try:
89
+ logger.info(f"Executing Shodan search_cursor with query: {shodan_query}")
90
+
91
+ # Use search_cursor to handle pagination automatically
92
+ cursor = api.search_cursor(shodan_query)
93
 
94
+ # Initialize scan metrics
95
  instances = []
96
+ processed = 0
97
+ batch_size = 100 # Process results in batches for progress updates
98
 
99
+ progress(0, desc="Initializing Shodan data retrieval")
 
100
 
101
+ # Process all results from the cursor
102
+ results_batch = []
103
+ for result in cursor:
104
+ results_batch.append(result)
105
+ processed += 1
 
 
 
 
 
 
 
106
 
107
+ # Process in batches for efficiency
108
+ if len(results_batch) >= batch_size:
109
+ progress(min(1.0, processed / (processed + 100)), desc=f"Retrieved {processed} Ollama instances")
110
+
111
+ # Extract instance data from batch
112
+ for result in results_batch:
113
+ instances.append({
114
+ 'ip': result.get('ip_str'),
115
+ 'port': result.get('port', 11434),
116
+ 'country': result.get('location', {}).get('country_name'),
117
+ 'region': result.get('location', {}).get('region_name'),
118
+ 'org': result.get('org'),
119
+ 'models': []
120
+ })
121
+ results_batch = []
122
+
123
+ # Process any remaining results
124
+ if results_batch:
125
+ for result in results_batch:
126
+ instances.append({
127
+ 'ip': result.get('ip_str'),
128
+ 'port': result.get('port', 11434),
129
+ 'country': result.get('location', {}).get('country_name'),
130
+ 'region': result.get('location', {}).get('region_name'),
131
+ 'org': result.get('org'),
132
+ 'models': []
133
+ })
134
+
135
+ logger.info(f"Completed Shodan scan, retrieved {len(instances)} Ollama instances")
136
  return instances
137
+
138
  except shodan.APIError as e:
139
+ error_msg = str(e)
140
+ if "Invalid API key" in error_msg:
141
+ logger.error(f"Shodan authentication failed: Invalid API key")
142
+ raise ValueError("Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable.")
143
+ elif "Request rate limit reached" in error_msg:
144
+ logger.error(f"Shodan rate limit exceeded: {e}")
145
+ raise ValueError("Shodan API rate limit exceeded. Please wait before trying again.")
146
+ else:
147
+ logger.error(f"Shodan API error: {e}")
148
+ raise
149
  except Exception as e:
150
+ logger.error(f"Unhandled exception during Shodan scan: {e}")
151
  raise
152
 
153
+ async def check_single_endpoint(session, instance):
154
+ """Check a single Ollama endpoint for available models."""
155
+ ip = instance['ip']
156
+ port = instance['port']
157
+ url = f"http://{ip}:{port}/api/tags"
158
+
159
+ try:
160
+ logger.info(f"Checking Ollama endpoint: {url}")
161
+
162
+ # Set a timeout for the request
163
+ async with session.get(url, timeout=5) as response:
164
+ if response.status == 200:
165
+ data = await response.json()
166
+ models = data.get('models', [])
167
+ logger.info(f"Found {len(models)} models at {url}")
168
+ instance['models'] = models
169
+ return instance
170
+ else:
171
+ logger.warning(f"Failed to get models from {url} - Status: {response.status}")
172
+ return instance
173
+ except asyncio.TimeoutError:
174
+ logger.warning(f"Timeout connecting to {url}")
175
+ return instance
176
+ except Exception as e:
177
+ logger.error(f"Error checking {url}: {e}")
178
+ return instance
179
 
180
+ async def check_ollama_endpoints(instances, progress=gr.Progress()):
181
  """
182
+ Check multiple Ollama endpoints for available models.
183
 
184
  Args:
185
+ instances: List of Ollama instances from Shodan
186
+ progress: Gradio progress bar
187
+
188
  Returns:
189
+ List of Ollama instances with model information
190
  """
191
+ if not instances:
192
+ return []
193
+
194
+ progress(0, desc="Checking Ollama endpoints")
195
 
196
+ # Set up async HTTP session
197
+ async with aiohttp.ClientSession() as session:
198
+ tasks = []
199
+ for instance in instances:
200
+ task = check_single_endpoint(session, instance)
201
+ tasks.append(task)
202
+
203
+ # Process tasks with progress updates
204
+ updated_instances = []
205
+ for i, task in enumerate(asyncio.as_completed(tasks)):
206
+ progress((i + 1) / len(tasks), desc=f"Checking endpoint {i + 1}/{len(tasks)}")
207
+ instance = await task
208
+ updated_instances.append(instance)
209
+
210
+ return updated_instances
 
211
 
212
+ def update_dataset_with_instances(dataset, instances):
213
  """
214
+ Update the HuggingFace dataset with new Ollama instances.
215
 
216
  Args:
 
217
  dataset: HuggingFace dataset
218
+ instances: List of Ollama instances with model information
219
 
220
  Returns:
221
+ Updated HuggingFace dataset
222
  """
223
+ if not instances:
224
+ logger.warning("No instances to update in dataset")
225
+ return dataset
226
+
227
+ # Convert dataset to list of dictionaries for easier manipulation
228
+ dataset_dict = {f"{item['ip']}:{item['port']}": item for item in dataset.to_list()}
 
229
 
230
  # Process each instance
231
+ updates_count = 0
232
+ new_instances = []
233
+
234
+ for instance in instances:
235
+ instance_key = f"{instance['ip']}:{instance['port']}"
 
 
 
 
 
236
 
237
+ if instance_key in dataset_dict:
238
+ # Update existing instance
239
+ dataset_dict[instance_key]['country'] = instance.get('country', dataset_dict[instance_key].get('country'))
240
+ dataset_dict[instance_key]['region'] = instance.get('region', dataset_dict[instance_key].get('region'))
241
+ dataset_dict[instance_key]['org'] = instance.get('org', dataset_dict[instance_key].get('org'))
242
 
243
+ # Only update models if they were found
244
+ if instance.get('models'):
245
+ dataset_dict[instance_key]['models'] = instance['models']
246
+
247
+ updates_count += 1
248
+ else:
249
+ # Add new instance
250
+ new_instances.append(instance)
251
 
252
+ # Create updated dataset list
253
+ updated_dataset_list = list(dataset_dict.values()) + new_instances
 
 
 
 
 
 
 
254
 
255
+ # Create a new dataset from the updated list
256
+ updated_dataset = datasets.Dataset.from_list(updated_dataset_list)
257
+
258
+ # Push updated dataset to HuggingFace Hub
259
+ hf_token = os.getenv("HF_TOKEN")
260
  updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
261
 
262
+ logger.info(f"Updated {updates_count} existing instances and added {len(new_instances)} new instances to dataset")
263
+
264
  return updated_dataset
265
 
266
+ def get_unique_values(dataset):
 
267
  """
268
+ Get unique values for model attributes to populate dropdown filters.
269
 
270
  Args:
271
  dataset: HuggingFace dataset
272
+
273
  Returns:
274
+ Dictionary with unique values for each attribute
275
  """
276
+ # Initialize empty sets
277
  families = set()
278
  parameter_sizes = set()
 
 
 
 
 
 
 
 
 
 
279
 
280
+ # Extract unique values from models
281
+ for instance in dataset:
282
+ for model in instance.get('models', []):
283
+ details = model.get('details', {})
284
+
285
+ # Handle both direct details in the model and nested details
286
+ if isinstance(details, dict):
287
+ family = details.get('family')
288
+ parameter_size = details.get('parameter_size')
289
+ else:
290
+ family = model.get('family')
291
+ parameter_size = model.get('parameter_size')
292
+
293
+ if family:
294
+ families.add(family)
295
+
296
+ if parameter_size:
297
+ parameter_sizes.add(parameter_size)
298
 
299
+ return {
300
+ 'families': sorted(list(families)),
301
+ 'parameter_sizes': sorted(list(parameter_sizes))
302
+ }
303
 
304
+ def search_models(dataset, family=None, parameter_size=None, name_search=None, is_admin=False):
 
 
 
 
 
 
305
  """
306
+ Search for models in the dataset based on filters.
307
 
308
  Args:
309
  dataset: HuggingFace dataset
310
  family: Filter by model family
311
  parameter_size: Filter by parameter size
312
+ name_search: Filter by model name (substring match)
313
+ is_admin: Whether to include IP and port information
314
+
315
  Returns:
316
+ List of dictionaries with model information
317
  """
318
  results = []
319
 
320
+ for instance in dataset:
321
+ ip = instance.get('ip')
322
+ port = instance.get('port')
323
+ country = instance.get('country')
324
+ region = instance.get('region')
325
+ org = instance.get('org')
326
+
327
+ for model in instance.get('models', []):
328
+ # Extract model details
329
+ model_name = model.get('name', '')
330
+
331
+ # Handle both direct details in the model and nested details
332
+ details = model.get('details', {})
333
+ if isinstance(details, dict):
334
+ model_family = details.get('family', '')
335
+ model_param_size = details.get('parameter_size', '')
336
+ model_quant_level = details.get('quantization_level', '')
337
+ else:
338
+ model_family = model.get('family', '')
339
+ model_param_size = model.get('parameter_size', '')
340
+ model_quant_level = model.get('quantization_level', '')
341
+
342
+ model_size_bytes = model.get('size', 0)
343
+ model_size_gb = model_size_bytes / (1024 * 1024 * 1024) if model_size_bytes else 0
344
+
345
  # Apply filters
346
+ if family and model_family != family:
347
  continue
348
+
349
+ if parameter_size and model_param_size != parameter_size:
350
  continue
351
+
352
+ if name_search and name_search.lower() not in model_name.lower():
353
  continue
354
 
355
+ # Create result object
356
  result = {
357
+ 'name': model_name,
358
+ 'family': model_family,
359
+ 'parameter_size': model_param_size,
360
+ 'quantization_level': model_quant_level,
361
+ 'size_gb': round(model_size_gb, 2),
362
+ 'country': country,
363
+ 'region': region,
364
+ 'org': org,
365
  }
366
 
367
+ # Include full model info for details view
368
+ result['full_model_info'] = json.dumps(model, indent=2)
369
+
370
+ # Include IP and port for admin users only
371
  if is_admin:
372
+ result['ip'] = ip
373
+ result['port'] = port
374
 
375
  results.append(result)
376
 
377
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
 
379
  def create_interface():
380
+ """Create the Gradio interface for the application."""
381
+ try:
382
+ # Load dataset once at startup
383
+ dataset = load_or_create_dataset()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
+ # Get unique values for dropdowns once at startup
386
+ unique_values = get_unique_values(dataset)
387
 
388
+ # Get all models to display on initial load
389
+ initial_results = search_models(dataset)
390
 
391
+ # Create Gradio interface
392
+ with gr.Blocks(title="Ollama Instance Scanner") as interface:
393
+ gr.Markdown("# Ollama Instance Scanner")
394
+ gr.Markdown("Browse publicly accessible Ollama instances and their models")
395
+
396
+ with gr.Tabs() as tabs:
397
+ # Browse Models Tab
398
+ with gr.TabItem("Browse Models"):
399
+ with gr.Row():
400
+ with gr.Column(scale=1):
401
+ family_dropdown = gr.Dropdown(
402
+ choices=["All"] + unique_values['families'],
403
+ value="All",
404
+ label="Filter by Family"
405
+ )
406
+ parameter_size_dropdown = gr.Dropdown(
407
+ choices=["All"] + unique_values['parameter_sizes'],
408
+ value="All",
409
+ label="Filter by Parameter Size"
410
+ )
411
+ name_search = gr.Textbox(
412
+ label="Search by Name",
413
+ placeholder="Enter model name..."
414
+ )
415
+ search_button = gr.Button("Search")
416
+
417
+ with gr.Row():
418
+ results_table = gr.DataFrame(
419
+ value=initial_results,
420
+ headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
421
+ label="Search Results"
 
 
 
 
 
 
 
 
422
  )
423
+
424
+ with gr.Row():
425
+ model_details = gr.JSON(label="Model Details")
426
 
427
+ # Shodan Scan Tab (Admin only)
428
+ with gr.TabItem("Shodan Scan (Admin Only)"):
429
+ gr.Markdown("## Shodan Scan")
430
+ gr.Markdown("This tab allows scanning for Ollama instances using Shodan. You need a valid Shodan API key set as an environment variable.")
431
+
432
+ shodan_scan_button = gr.Button("Start Shodan Scan")
433
+ scan_status = gr.Textbox(label="Scan Status", interactive=False)
434
+
435
+ # Define event handlers
436
+ def on_search_click(family, parameter_size, name_search):
437
+ # Use "All" as a signal not to filter
438
+ family_filter = None if family == "All" else family
439
+ param_size_filter = None if parameter_size == "All" else parameter_size
440
+ name_filter = None if not name_search else name_search
441
 
442
+ # Check if admin mode is enabled (would need to implement proper authentication)
443
+ is_admin = False # This should be based on proper authentication
 
 
444
 
445
+ # Search for models
446
+ results = search_models(dataset, family_filter, param_size_filter, name_filter, is_admin)
 
 
 
 
447
 
448
+ # Return results
449
+ return results
 
 
 
450
 
451
+ def on_table_select(evt: gr.SelectData, results):
452
+ if evt.index[0] < len(results):
453
+ selected_row = results[evt.index[0]]
454
+ return selected_row.get('full_model_info', {})
455
+ return {}
456
+
457
+ async def run_shodan_scan():
458
+ try:
459
+ # Verify Shodan API Key exists
460
+ if not os.getenv("SHODAN_API_KEY"):
461
+ return "Error: SHODAN_API_KEY environment variable is not set."
462
+
463
+ # Perform Shodan scan
464
+ instances = scan_shodan()
465
 
466
+ if not instances:
467
+ return "No Ollama instances found in Shodan scan."
468
+
469
+ # Check Ollama endpoints
470
+ updated_instances = await check_ollama_endpoints(instances)
471
+
472
+ # Update dataset
473
+ nonlocal dataset
474
+ dataset = update_dataset_with_instances(dataset, updated_instances)
475
+
476
+ # Update unique values
477
+ nonlocal unique_values
478
+ unique_values = get_unique_values(dataset)
479
+
480
+ # Update dropdown choices
481
+ family_dropdown.choices = ["All"] + unique_values['families']
482
+ parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
483
+
484
+ return f"Scan completed successfully. Found {len(instances)} instances, {sum(1 for i in updated_instances if i.get('models'))} with accessible models."
485
+ except Exception as e:
486
+ logger.error(f"Error in Shodan scan: {e}")
487
+ return f"Error: {str(e)}"
488
+
489
+ # Connect event handlers
490
+ search_button.click(
491
+ on_search_click,
492
+ inputs=[family_dropdown, parameter_size_dropdown, name_search],
493
+ outputs=[results_table]
494
+ )
495
+
496
+ results_table.select(
497
+ on_table_select,
498
+ inputs=[results_table],
499
+ outputs=[model_details]
500
+ )
501
+
502
+ shodan_scan_button.click(
503
+ run_shodan_scan,
504
+ inputs=[],
505
+ outputs=[scan_status]
506
+ )
507
+
508
+ return interface
509
 
510
+ except Exception as e:
511
+ logger.error(f"Failed to create Gradio interface: {e}")
512
+ raise
513
 
514
  def main():
515
  """Main function to run the application."""
516
+ try:
517
+ interface = create_interface()
518
+ if interface:
519
+ interface.launch()
520
+ else:
521
+ logger.error("Failed to create interface")
522
+ except Exception as e:
523
+ logger.error(f"Application failed: {e}")
524
 
525
  if __name__ == "__main__":
526
  main()