latterworks commited on
Commit
4cda29c
·
verified ·
1 Parent(s): 2d96a84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +329 -440
app.py CHANGED
@@ -1,532 +1,421 @@
1
  import os
2
  import logging
3
  import asyncio
4
- import bcrypt
5
- import requests
6
- import shodan
7
  import gradio as gr
8
- from typing import List, Dict, Any, Tuple, Optional
9
- from datasets import load_dataset, Dataset
10
- from huggingface_hub import HfApi, create_repo
11
 
12
- # Setup logging
13
  logging.basicConfig(
14
  level=logging.INFO,
15
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
16
  )
17
  logger = logging.getLogger(__name__)
18
 
19
- # Environment variable validation
20
- required_env_vars = ["SHODAN_API_KEY", "ADMIN_PASSWORD", "SHODAN_QUERY"]
21
- for var in required_env_vars:
22
- if not os.environ.get(var):
23
- logger.warning(f"Environment variable {var} is not set")
24
 
25
- # Dataset functions
26
- def get_or_create_dataset(dataset_name: str = "latterworks/llama_checker_results") -> Optional[Dataset]:
27
  """
28
- Load the dataset or create it if it doesn't exist.
29
 
30
- Args:
31
- dataset_name: The name of the dataset on Hugging Face Hub
32
-
33
  Returns:
34
- The dataset or None if there was an error
35
  """
 
 
 
 
36
  try:
37
- # Try to load the dataset
38
- token = os.environ.get("HF_TOKEN")
39
-
40
- try:
41
- dataset_dict = load_dataset(dataset_name, token=token)
42
- if "train" in dataset_dict:
43
- return dataset_dict["train"]
44
- else:
45
- # If no "train" split, try to use the first available split
46
- first_split = next(iter(dataset_dict))
47
- return dataset_dict[first_split]
48
- except Exception as e:
49
- logger.error(f"Failed to load dataset {dataset_name}: {e}")
50
-
51
- # Create the dataset
52
- try:
53
- # Create the repository
54
- hf_api = HfApi(token=token)
55
- create_repo(repo_id=dataset_name, repo_type="dataset", token=token)
56
-
57
- # Create empty dataset with the correct schema
58
- empty_dataset = Dataset.from_dict({
59
- "ip": [],
60
- "port": [],
61
- "country": [],
62
- "region": [],
63
- "org": [],
64
- "models": []
65
- })
66
-
67
- # Push to Hub
68
- empty_dataset.push_to_hub(dataset_name, token=token)
69
-
70
- return empty_dataset
71
- except Exception as create_e:
72
- logger.error(f"Failed to create dataset: {create_e}")
73
- return None
74
  except Exception as e:
75
- logger.exception(f"Unexpected error in get_or_create_dataset: {e}")
76
- return None
 
 
 
77
 
78
- def update_dataset(dataset: Dataset, new_entries: List[Dict[str, Any]]) -> Optional[Dataset]:
79
  """
80
- Update the dataset with new entries from Shodan scan.
81
 
82
  Args:
83
- dataset: The dataset to update
84
- new_entries: List of new entries to add or update in the dataset
85
-
86
  Returns:
87
- The updated dataset or None if there was an error
88
  """
89
- if dataset is None:
90
- logger.error("Cannot update None dataset")
91
- return None
 
 
 
 
92
 
93
- # Convert dataset to dictionaries for easier manipulation
94
- dataset_dict = dataset.to_dict()
95
 
96
- # Add new entries
97
- for entry in new_entries:
98
- ip = entry.get("ip")
99
- port = entry.get("port")
 
 
 
 
 
100
 
101
- # Check if this IP:port combination already exists
102
- found = False
103
- for i, (existing_ip, existing_port) in enumerate(zip(dataset_dict["ip"], dataset_dict["port"])):
104
- if existing_ip == ip and existing_port == port:
105
- # Update the entry
106
- dataset_dict["country"][i] = entry.get("country", dataset_dict["country"][i])
107
- dataset_dict["region"][i] = entry.get("region", dataset_dict["region"][i])
108
- dataset_dict["org"][i] = entry.get("org", dataset_dict["org"][i])
109
- dataset_dict["models"][i] = entry.get("models", dataset_dict["models"][i])
110
- found = True
111
- break
112
 
113
- if not found:
114
- # Add as a new entry
115
- dataset_dict["ip"].append(entry.get("ip", ""))
116
- dataset_dict["port"].append(entry.get("port", 0))
117
- dataset_dict["country"].append(entry.get("country", ""))
118
- dataset_dict["region"].append(entry.get("region", ""))
119
- dataset_dict["org"].append(entry.get("org", ""))
120
- dataset_dict["models"].append(entry.get("models", []))
121
-
122
- # Convert back to Dataset
123
- updated_dataset = Dataset.from_dict(dataset_dict)
124
-
125
- # Push to Hub
126
- token = os.environ.get("HF_TOKEN")
127
- updated_dataset.push_to_hub("latterworks/llama_checker_results", token=token)
128
 
129
- return updated_dataset
 
 
 
 
 
 
130
 
131
- # Ollama endpoint checking
132
- async def check_ollama_endpoint(ip: str, port: int) -> Dict[str, Any]:
133
  """
134
- Check a single Ollama endpoint and retrieve model information.
135
 
136
  Args:
137
- ip: The IP address of the Ollama instance
138
- port: The port of the Ollama instance
139
-
140
  Returns:
141
- A dictionary with IP, port, models, and status information
142
  """
143
  url = f"http://{ip}:{port}/api/tags"
144
- models = []
145
- status = "success"
146
 
147
  try:
 
148
  response = requests.get(url, timeout=5)
149
- response.raise_for_status()
150
-
151
- data = response.json()
152
- if "models" in data:
153
- for model_data in data["models"]:
154
- details = model_data.get("details", {})
155
- model_info = {
156
- "name": model_data.get("name", ""),
157
- "family": details.get("family", ""),
158
- "parameter_size": details.get("parameter_size", ""),
159
- "quantization_level": details.get("quantization_level", ""),
160
- "digest": model_data.get("digest", ""),
161
- "modified_at": model_data.get("modified_at", ""),
162
- "size": model_data.get("size", 0)
163
- }
164
- models.append(model_info)
165
  except requests.exceptions.RequestException as e:
166
- logger.error(f"Network error when checking {ip}:{port}: {e}")
167
- status = "connection failed"
168
- except ValueError as e:
169
- logger.error(f"Invalid JSON from {ip}:{port}: {e}")
170
- status = "invalid json"
171
  except Exception as e:
172
- logger.exception(f"Unexpected error when checking {ip}:{port}")
173
- status = "unexpected error"
174
-
175
- return {
176
- "ip": ip,
177
- "port": port,
178
- "models": models,
179
- "status": status
180
- }
181
 
182
- async def check_ollama_endpoints(entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 
183
  """
184
- Check multiple Ollama endpoints concurrently.
185
 
186
  Args:
187
- entries: List of entries containing IP and port information
 
 
188
 
189
  Returns:
190
- List of updated entries with model information
191
- """
192
- tasks = []
193
- for entry in entries:
194
- task = asyncio.create_task(check_ollama_endpoint(entry["ip"], entry["port"]))
195
- tasks.append((entry, task))
196
-
197
- results = []
198
- for entry, task in tasks:
199
- try:
200
- result = await task
201
- # Merge the result with the original entry
202
- # This preserves fields like country, region, and org
203
- updated_entry = entry.copy()
204
- updated_entry["models"] = result["models"]
205
- updated_entry["status"] = result["status"]
206
- results.append(updated_entry)
207
- except Exception as e:
208
- logger.error(f"Error checking endpoint {entry.get('ip')}:{entry.get('port')}: {e}")
209
- entry["models"] = []
210
- entry["status"] = "error"
211
- results.append(entry)
212
-
213
- return results
214
-
215
- # Shodan scanning
216
- def run_shodan_scan() -> List[Dict[str, Any]]:
217
- """
218
- Run a Shodan scan for Ollama instances.
219
-
220
- Returns:
221
- List of entries containing IP, port, and location information
222
  """
223
- api_key = os.environ.get("SHODAN_API_KEY")
224
- if not api_key:
225
- logger.error("SHODAN_API_KEY environment variable not set")
226
- return []
227
-
228
- query = os.environ.get("SHODAN_QUERY", "product:Ollama port:11434")
229
-
230
- try:
231
- api = shodan.Shodan(api_key)
232
- results = api.search(query, limit=1000)
 
 
233
 
234
- entries = []
235
- for result in results["matches"]:
236
- entry = {
237
- "ip": result.get("ip_str", ""),
238
- "port": result.get("port", 0),
239
- "country": result.get("location", {}).get("country_name", ""),
240
- "region": result.get("location", {}).get("region_name", ""),
241
- "org": result.get("org", ""),
242
- "models": []
243
- }
244
- entries.append(entry)
245
 
246
- return entries
247
- except shodan.APIError as e:
248
- logger.error(f"Shodan API error: {e}")
249
- return []
250
- except Exception as e:
251
- logger.exception(f"Unexpected error in run_shodan_scan")
252
- return []
253
-
254
- # Password validation
255
- def validate_admin_password(password: str) -> bool:
256
- """
257
- Validate the admin password.
258
-
259
- Args:
260
- password: The entered password to validate
261
 
262
- Returns:
263
- True if the password is valid, False otherwise
264
- """
265
- stored_password = os.environ.get("ADMIN_PASSWORD")
266
- if not stored_password:
267
- logger.error("ADMIN_PASSWORD environment variable not set")
268
- return False
269
-
270
- # If the stored password starts with '$2b', it's a bcrypt hash
271
- if stored_password.startswith('$2b'):
272
- return bcrypt.checkpw(password.encode('utf-8'), stored_password.encode('utf-8'))
273
- else:
274
- # Otherwise, do a direct comparison
275
- return password == stored_password
 
 
 
 
 
 
 
 
276
 
277
- # Gradio UI functions
278
- def get_model_families_and_sizes(dataset: Dataset) -> Tuple[List[str], List[str]]:
279
  """
280
- Extract all unique model families and parameter sizes from the dataset.
281
 
282
  Args:
283
- dataset: The dataset to extract from
284
-
285
  Returns:
286
- Tuple of (families, parameter_sizes)
287
  """
288
- if dataset is None:
289
- return [], []
290
-
291
  families = set()
292
  parameter_sizes = set()
293
-
294
- for i in range(len(dataset)):
295
- models = dataset[i]["models"]
296
- if models:
297
- for model in models:
298
- family = model.get("family")
299
- param_size = model.get("parameter_size")
300
-
301
- if family:
302
- families.add(family)
303
- if param_size:
304
- parameter_sizes.add(param_size)
305
-
306
- return sorted(list(families)), sorted(list(parameter_sizes))
 
 
 
307
 
308
- def search_models(family: str, parameter_size: str, name: str, dataset: Dataset, is_admin: bool) -> Tuple[List[Dict], Dict]:
 
 
 
 
 
 
 
309
  """
310
- Search for models in the dataset based on filters.
311
 
312
  Args:
 
313
  family: Filter by model family
314
  parameter_size: Filter by parameter size
315
  name: Filter by model name
316
- dataset: The dataset to search in
317
  is_admin: Whether the user is an admin
318
-
319
  Returns:
320
- Tuple of (filtered_models, empty_details)
321
  """
322
- if dataset is None:
323
- return [], {}
324
-
325
- # Collect all models from the dataset
326
- all_models = []
327
- for i in range(len(dataset)):
328
- ip = dataset[i]["ip"]
329
- port = dataset[i]["port"]
330
- models = dataset[i]["models"]
331
-
332
- if models:
333
- for model in models:
334
- model_copy = model.copy()
335
-
336
- # Add source info if admin
337
- if is_admin:
338
- model_copy["source_ip"] = ip
339
- model_copy["source_port"] = port
340
-
341
- # Calculate size in GB
342
- if "size" in model:
343
- model_copy["size_gb"] = round(model["size"] / (1024**3), 2)
344
- else:
345
- model_copy["size_gb"] = 0
346
-
347
- all_models.append(model_copy)
348
-
349
- # Apply filters
350
- filtered_models = all_models
351
- if family:
352
- filtered_models = [m for m in filtered_models if m.get("family") == family]
353
- if parameter_size:
354
- filtered_models = [m for m in filtered_models if m.get("parameter_size") == parameter_size]
355
- if name:
356
- filtered_models = [m for m in filtered_models if name.lower() in m.get("name", "").lower()]
357
-
358
- return filtered_models, {}
359
 
360
- def select_model(evt: gr.SelectData, models: List[Dict]) -> Dict:
361
  """
362
- Handle model selection from the table.
363
 
364
  Args:
365
- evt: The selection event
366
- models: The list of models
367
-
368
  Returns:
369
- The selected model details
370
  """
371
- if not models or evt.index >= len(models):
372
- return {}
373
-
374
- return models[evt.index]
375
 
376
- async def scan_worker() -> str:
 
377
  """
378
- Run the complete scan workflow.
379
 
380
  Returns:
381
- Status message
382
  """
383
- # Run Shodan scan
384
- entries = run_shodan_scan()
385
- if not entries:
386
- return "No Ollama instances found or scan failed"
387
-
388
- # Check endpoints
389
- updated_entries = await check_ollama_endpoints(entries)
390
-
391
- # Update dataset
392
- dataset = get_or_create_dataset()
393
- if dataset is not None:
394
- update_dataset(dataset, updated_entries)
395
- return f"Scan completed. Found {len(entries)} Ollama instances."
396
- else:
397
- return "Scan completed but failed to update dataset"
398
-
399
- # Main application
400
- def create_app():
401
- # Load the dataset
402
- dataset = get_or_create_dataset()
403
-
404
- # Get model families and parameter sizes
405
- families, parameter_sizes = [], []
406
- if dataset is not None:
407
- families, parameter_sizes = get_model_families_and_sizes(dataset)
408
-
409
- with gr.Blocks(title="Ollama Instance Explorer") as app:
410
- # Admin login section
411
- with gr.Row():
412
- admin_password = gr.Textbox(
413
- label="Admin Password",
414
- type="password",
415
- placeholder="Enter admin password"
416
- )
417
- login_button = gr.Button("Login")
418
- login_status = gr.Textbox(
419
- label="Login Status",
420
- value="",
421
- interactive=False
422
- )
423
 
424
- # Admin state
425
- is_admin = gr.State(False)
426
 
427
- # Admin-only section
428
- with gr.Tab("Shodan Scan", visible=False) as admin_tab:
429
- scan_button = gr.Button("Start Scan")
430
- scan_status = gr.Textbox(
431
- label="Scan Status",
432
- value="",
433
- interactive=False
434
- )
435
-
436
- def on_scan_click():
437
- # We can't use async directly with Gradio, so use asyncio.run
438
- try:
439
- return asyncio.run(scan_worker())
440
- except Exception as e:
441
- logger.exception("Error during scan")
442
- return f"Error during scan: {str(e)}"
443
-
444
- scan_button.click(
445
- on_scan_click,
446
- inputs=[],
447
- outputs=[scan_status]
448
- )
 
449
 
450
- # Public section
451
- with gr.Tab("Browse Models"):
452
- with gr.Row():
453
- family_filter = gr.Dropdown(
454
- label="Family",
455
- choices=[""] + families,
456
- value=""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  )
458
- parameter_size_filter = gr.Dropdown(
459
- label="Parameter Size",
460
- choices=[""] + parameter_sizes,
461
- value=""
462
  )
463
- name_filter = gr.Textbox(
464
- label="Name Search",
465
- placeholder="Enter model name to search"
 
 
 
466
  )
467
-
468
- search_button = gr.Button("Search")
469
-
470
- with gr.Row():
471
- models_table = gr.DataFrame(
472
- headers=["name", "family", "parameter_size", "quantization_level", "size_gb"],
473
- datatype=["str", "str", "str", "str", "number"],
474
- interactive=False
475
  )
476
- model_details = gr.JSON(label="Model Details")
477
 
478
- def on_search(family, parameter_size, name, admin_status):
479
- models, _ = search_models(family, parameter_size, name, dataset, admin_status)
480
-
481
- # Create DataFrame-friendly format
482
- df_data = []
483
- for model in models:
484
- row = {
485
- "name": model.get("name", ""),
486
- "family": model.get("family", ""),
487
- "parameter_size": model.get("parameter_size", ""),
488
- "quantization_level": model.get("quantization_level", ""),
489
- "size_gb": model.get("size_gb", 0)
490
- }
491
- df_data.append(row)
492
 
493
- return df_data, {}
494
-
495
- search_button.click(
496
- on_search,
497
- inputs=[family_filter, parameter_size_filter, name_filter, is_admin],
498
- outputs=[models_table, model_details]
499
- )
500
-
501
- models_table.select(
502
- select_model,
503
- inputs=[models_table],
504
- outputs=[model_details]
505
- )
506
-
507
- # Handle login
508
- def on_login(password):
509
- if validate_admin_password(password):
510
- return True, gr.update(visible=True), "Login successful"
511
- else:
512
- return False, gr.update(visible=False), "Invalid password"
513
-
514
- login_button.click(
515
- on_login,
516
- inputs=[admin_password],
517
- outputs=[is_admin, admin_tab, login_status]
518
- )
519
-
520
- # Initial search on load
521
- app.load(
522
- lambda: on_search("", "", "", False),
523
- inputs=None,
524
- outputs=[models_table, model_details]
525
- )
526
 
527
- return app
 
 
 
 
 
 
 
 
528
 
529
- # Run the app
530
  if __name__ == "__main__":
531
- app = create_app()
532
- app.launch()
 
1
  import os
2
  import logging
3
  import asyncio
4
+ import time
5
+ from typing import Dict, List, Optional, Any, Tuple
6
+
7
  import gradio as gr
8
+ import datasets
9
+ import shodan
10
+ import requests
11
 
12
+ # Set up logging
13
  logging.basicConfig(
14
  level=logging.INFO,
15
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
16
  )
17
  logger = logging.getLogger(__name__)
18
 
 
 
 
 
 
19
 
20
+ def load_or_create_dataset():
 
21
  """
22
+ Load or create the dataset.
23
 
 
 
 
24
  Returns:
25
+ HuggingFace dataset
26
  """
27
+ hf_token = os.getenv("HF_TOKEN")
28
+ if not hf_token:
29
+ raise ValueError("HF_TOKEN environment variable is not set")
30
+
31
  try:
32
+ dataset = datasets.load_dataset("latterworks/llama_checker_results", use_auth_token=hf_token)
33
+ # Convert to in-memory dataset for easier manipulation
34
+ dataset = dataset['train']
35
+ except FileNotFoundError:
36
+ # Dataset doesn't exist, create it
37
+ dataset = datasets.Dataset.from_dict({"ip": [], "port": [], "country": [], "region": [], "org": [], "models": []})
38
+ dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
39
+ dataset = datasets.load_dataset("latterworks/llama_checker_results", use_auth_token=hf_token)['train']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  except Exception as e:
41
+ logging.error(f"Failed to load or create dataset: {e}")
42
+ raise # Re-raise the exception to stop the application
43
+
44
+ return dataset
45
+
46
 
47
+ def scan_shodan(progress=gr.Progress()) -> List[Dict]:
48
  """
49
+ Scan Shodan for Ollama instances.
50
 
51
  Args:
52
+ progress: Gradio progress bar
53
+
 
54
  Returns:
55
+ List of dictionaries containing information about Ollama instances
56
  """
57
+ # Validate Shodan API key exists
58
+ shodan_api_key = os.getenv("SHODAN_API_KEY")
59
+ if not shodan_api_key:
60
+ raise ValueError("SHODAN_API_KEY environment variable is not set")
61
+
62
+ # Get Shodan query
63
+ shodan_query = os.getenv("SHODAN_QUERY", "product:Ollama port:11434")
64
 
65
+ # Initialize Shodan API
66
+ api = shodan.Shodan(shodan_api_key)
67
 
68
+ try:
69
+ # Search Shodan
70
+ logger.info(f"Searching Shodan with query: {shodan_query}")
71
+ results = api.search(shodan_query)
72
+
73
+ # Process results
74
+ instances = []
75
+ total_results = results['total']
76
+ logger.info(f"Found {total_results} results")
77
 
78
+ # Set up progress bar
79
+ progress(0, desc="Scanning Shodan for Ollama instances")
 
 
 
 
 
 
 
 
 
80
 
81
+ for i, result in enumerate(results['matches']):
82
+ progress((i+1)/len(results['matches']), desc=f"Processing result {i+1}/{len(results['matches'])}")
83
+
84
+ instance = {
85
+ 'ip': result['ip_str'],
86
+ 'port': result.get('port', 11434),
87
+ 'country': result.get('location', {}).get('country_name'),
88
+ 'region': result.get('location', {}).get('region_name'),
89
+ 'org': result.get('org'),
90
+ 'models': []
91
+ }
92
+ instances.append(instance)
93
+
94
+ return instances
 
95
 
96
+ except shodan.APIError as e:
97
+ logger.error(f"Shodan API error: {e}")
98
+ raise
99
+ except Exception as e:
100
+ logger.error(f"Error during Shodan scan: {e}")
101
+ raise
102
+
103
 
104
+ async def check_single_endpoint(ip: str, port: int) -> Optional[List[Dict]]:
 
105
  """
106
+ Check a single Ollama endpoint for available models.
107
 
108
  Args:
109
+ ip: IP address of the endpoint
110
+ port: Port number of the endpoint
111
+
112
  Returns:
113
+ List of models if successful, None otherwise
114
  """
115
  url = f"http://{ip}:{port}/api/tags"
 
 
116
 
117
  try:
118
+ # Set a timeout of 5 seconds
119
  response = requests.get(url, timeout=5)
120
+ if response.status_code == 200:
121
+ data = response.json()
122
+ return data.get('models', [])
123
+ else:
124
+ logger.warning(f"Failed to get models from {ip}:{port}, status code: {response.status_code}")
125
+ return None
 
 
 
 
 
 
 
 
 
 
126
  except requests.exceptions.RequestException as e:
127
+ logger.warning(f"Error connecting to {ip}:{port}: {e}")
128
+ return None
 
 
 
129
  except Exception as e:
130
+ logger.warning(f"Unexpected error checking {ip}:{port}: {e}")
131
+ return None
 
 
 
 
 
 
 
132
 
133
+
134
+ async def check_ollama_endpoints(instances: List[Dict], dataset, progress=gr.Progress()) -> datasets.Dataset:
135
  """
136
+ Check all Ollama endpoints for available models asynchronously.
137
 
138
  Args:
139
+ instances: List of dictionaries containing information about Ollama instances
140
+ dataset: HuggingFace dataset
141
+ progress: Gradio progress bar
142
 
143
  Returns:
144
+ Updated dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  """
146
+ # Validate HF token exists
147
+ hf_token = os.getenv("HF_TOKEN")
148
+ if not hf_token:
149
+ raise ValueError("HF_TOKEN environment variable is not set")
150
+
151
+ # Convert dataset to dictionary for easier manipulation
152
+ dataset_dict = {f"{item['ip']}:{item['port']}": item for item in dataset}
153
+
154
+ # Process each instance
155
+ progress(0, desc="Checking Ollama endpoints")
156
+ for i, instance in enumerate(instances):
157
+ progress((i+1)/len(instances), desc=f"Checking endpoint {i+1}/{len(instances)}")
158
 
159
+ ip = instance['ip']
160
+ port = instance['port']
161
+ key = f"{ip}:{port}"
 
 
 
 
 
 
 
 
162
 
163
+ # Get models from the endpoint
164
+ models = await check_single_endpoint(ip, port)
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ if models:
167
+ # Update instance with models
168
+ instance['models'] = models
169
+
170
+ # Update or add to dataset dictionary
171
+ dataset_dict[key] = instance
172
+
173
+ # Convert back to dataset
174
+ updated_dataset = datasets.Dataset.from_dict({
175
+ "ip": [item['ip'] for item in dataset_dict.values()],
176
+ "port": [item['port'] for item in dataset_dict.values()],
177
+ "country": [item.get('country', '') for item in dataset_dict.values()],
178
+ "region": [item.get('region', '') for item in dataset_dict.values()],
179
+ "org": [item.get('org', '') for item in dataset_dict.values()],
180
+ "models": [item.get('models', []) for item in dataset_dict.values()]
181
+ })
182
+
183
+ # Push updates to hub
184
+ updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
185
+
186
+ return updated_dataset
187
+
188
 
189
+ def get_unique_values(dataset) -> Tuple[List[str], List[str], List[str]]:
 
190
  """
191
+ Get unique values for family, parameter_size, and name.
192
 
193
  Args:
194
+ dataset: HuggingFace dataset
195
+
196
  Returns:
197
+ Tuple of lists containing unique values for family, parameter_size, and name
198
  """
 
 
 
199
  families = set()
200
  parameter_sizes = set()
201
+ names = set()
202
+
203
+ for item in dataset:
204
+ for model in item.get('models', []):
205
+ if 'family' in model and model['family']:
206
+ families.add(model['family'])
207
+ if 'parameter_size' in model and model['parameter_size']:
208
+ parameter_sizes.add(model['parameter_size'])
209
+ if 'name' in model and model['name']:
210
+ names.add(model['name'])
211
+
212
+ # Convert to sorted lists and add empty option
213
+ families = [''] + sorted(list(families))
214
+ parameter_sizes = [''] + sorted(list(parameter_sizes))
215
+ names = sorted(list(names))
216
+
217
+ return families, parameter_sizes, names
218
 
219
+
220
+ def search_models(
221
+ dataset,
222
+ family: str = "",
223
+ parameter_size: str = "",
224
+ name: str = "",
225
+ is_admin: bool = False
226
+ ) -> Tuple[List[Dict], List[Dict]]:
227
  """
228
+ Search models based on criteria.
229
 
230
  Args:
231
+ dataset: HuggingFace dataset
232
  family: Filter by model family
233
  parameter_size: Filter by parameter size
234
  name: Filter by model name
 
235
  is_admin: Whether the user is an admin
236
+
237
  Returns:
238
+ Tuple of (results, selected model info)
239
  """
240
+ results = []
241
+
242
+ for item in dataset:
243
+ for model in item.get('models', []):
244
+ # Apply filters
245
+ if family and model.get('family', '') != family:
246
+ continue
247
+ if parameter_size and model.get('parameter_size', '') != parameter_size:
248
+ continue
249
+ if name and name.lower() not in model.get('name', '').lower():
250
+ continue
251
+
252
+ # Create result with model info
253
+ result = {
254
+ 'name': model.get('name', ''),
255
+ 'family': model.get('family', ''),
256
+ 'parameter_size': model.get('parameter_size', ''),
257
+ 'quantization_level': model.get('quantization_level', ''),
258
+ 'size': round(model.get('size', 0) / (1024**3), 2) # Convert to GB
259
+ }
260
+
261
+ # Add IP and port only for admin users
262
+ if is_admin:
263
+ result['ip'] = item['ip']
264
+ result['port'] = item['port']
265
+
266
+ results.append(result)
267
+
268
+ # For empty result, return empty JSON info
269
+ selected_model_info = [{}]
270
+
271
+ return results, selected_model_info
272
+
 
 
 
 
273
 
274
+ def get_model_info(model_row: Dict) -> Dict:
275
  """
276
+ Get detailed information about a selected model.
277
 
278
  Args:
279
+ model_row: Selected model row from the results
280
+
 
281
  Returns:
282
+ Dictionary containing detailed model information
283
  """
284
+ return model_row
 
 
 
285
 
286
+
287
+ def create_interface():
288
  """
289
+ Create Gradio interface for the application.
290
 
291
  Returns:
292
+ Gradio interface
293
  """
294
+ # Load or create dataset
295
+ dataset = load_or_create_dataset()
296
+
297
+ # Check for admin mode
298
+ is_admin = os.getenv("ADMIN_MODE", "false").lower() == "true"
299
+
300
+ # Get unique values for dropdown menus
301
+ families, parameter_sizes, names = get_unique_values(dataset)
302
+
303
+ # Get initial search results
304
+ initial_results, initial_model_info = search_models(dataset, is_admin=is_admin)
305
+
306
+ # Function to run Shodan scan
307
+ def run_shodan_scan(progress=gr.Progress()):
308
+ nonlocal dataset
309
+ instances = scan_shodan(progress)
310
+ dataset = asyncio.run(check_ollama_endpoints(instances, dataset, progress))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
+ # Update unique values
313
+ updated_families, updated_parameter_sizes, updated_names = get_unique_values(dataset)
314
 
315
+ # Update search results
316
+ updated_results, updated_model_info = search_models(dataset, is_admin=is_admin)
317
+
318
+ return (
319
+ updated_families, updated_parameter_sizes,
320
+ updated_results, updated_model_info
321
+ )
322
+
323
+ # Function to run model search
324
+ def run_search(family, parameter_size, name):
325
+ results, model_info = search_models(dataset, family, parameter_size, name, is_admin=is_admin)
326
+ return results, model_info
327
+
328
+ # Function to get model details when a row is selected
329
+ def select_model(evt: gr.SelectData, results):
330
+ if evt.index[0] < len(results):
331
+ selected = results[evt.index[0]]
332
+ return selected
333
+ return {}
334
+
335
+ # Create Gradio interface
336
+ with gr.Blocks(title="Ollama Instance Scanner") as interface:
337
+ gr.Markdown("# Ollama Instance Scanner")
338
 
339
+ with gr.Tabs():
340
+ # Browse Models tab
341
+ with gr.TabItem("Browse Models"):
342
+ with gr.Row():
343
+ with gr.Column():
344
+ family_dropdown = gr.Dropdown(
345
+ choices=families,
346
+ label="Model Family",
347
+ value=""
348
+ )
349
+ parameter_size_dropdown = gr.Dropdown(
350
+ choices=parameter_sizes,
351
+ label="Parameter Size",
352
+ value=""
353
+ )
354
+ name_search = gr.Textbox(
355
+ label="Model Name",
356
+ placeholder="Search by name..."
357
+ )
358
+ search_button = gr.Button("Search")
359
+
360
+ results_df = gr.DataFrame(
361
+ value=initial_results,
362
+ label="Search Results",
363
+ headers=["name", "family", "parameter_size", "quantization_level", "size"],
364
+ row_count=10,
365
+ interactive=False
366
  )
367
+
368
+ model_info = gr.JSON(
369
+ value=initial_model_info[0] if initial_model_info else {},
370
+ label="Model Details"
371
  )
372
+
373
+ # Event handlers
374
+ search_button.click(
375
+ fn=run_search,
376
+ inputs=[family_dropdown, parameter_size_dropdown, name_search],
377
+ outputs=[results_df, model_info]
378
  )
379
+
380
+ results_df.select(
381
+ fn=select_model,
382
+ inputs=[results_df],
383
+ outputs=[model_info]
 
 
 
384
  )
 
385
 
386
+ # Shodan Scan tab
387
+ with gr.TabItem("Shodan Scan"):
388
+ # Check if Shodan API key is available
389
+ shodan_api_key = os.getenv("SHODAN_API_KEY")
 
 
 
 
 
 
 
 
 
 
390
 
391
+ if shodan_api_key:
392
+ scan_button = gr.Button("Start Scan")
393
+ scan_output = gr.Markdown("Press the button to start scanning Shodan for Ollama instances.")
394
+
395
+ # Event handlers
396
+ scan_button.click(
397
+ fn=run_shodan_scan,
398
+ outputs=[
399
+ family_dropdown, parameter_size_dropdown,
400
+ results_df, model_info
401
+ ]
402
+ )
403
+ else:
404
+ gr.Markdown("## Shodan API key not configured")
405
+ gr.Markdown(
406
+ "To use the Shodan scan feature, you need to set the `SHODAN_API_KEY` "
407
+ "environment variable in your Hugging Face Space settings."
408
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
+ return interface
411
+
412
+
413
+ def main():
414
+ """Main function to run the application."""
415
+ # Create and launch interface
416
+ interface = create_interface()
417
+ interface.launch()
418
+
419
 
 
420
  if __name__ == "__main__":
421
+ main()