latterworks commited on
Commit
2d96a84
·
verified ·
1 Parent(s): 44d06c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +430 -462
app.py CHANGED
@@ -1,564 +1,532 @@
1
  import os
2
  import logging
3
- import requests
4
  import bcrypt
5
- import gradio as gr
6
- from huggingface_hub import HfApi, login
7
- from datasets import load_dataset, Dataset, Features, Value, Sequence
8
- from typing import Dict, List, Optional, Any
9
- import time
10
- from concurrent.futures import ThreadPoolExecutor, as_completed
11
  import shodan
12
- import html
 
 
 
13
 
14
- # Configure logging
15
  logging.basicConfig(
16
  level=logging.INFO,
17
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
18
  )
19
  logger = logging.getLogger(__name__)
20
 
21
- # Function to access secrets
22
- def get_secret(name: str) -> str:
23
- """Get a secret from Hugging Face Space secrets."""
24
- try:
25
- from huggingface_hub import secrets
26
- return secrets.get_secret(name)
27
- except Exception as e:
28
- logger.warning(f"Error accessing secret {name}: {e}")
29
- return ""
30
-
31
- # Sanitize inputs
32
- def sanitize_input(text: str) -> str:
33
- """Sanitize user input to prevent injection attacks."""
34
- if not text:
35
- return ""
36
- return html.escape(text)
37
 
38
  # Dataset functions
39
- def get_or_create_dataset(repo_id: str, token: Optional[str] = None) -> Dataset:
40
  """
41
- Get or create a dataset with the specified schema.
42
 
43
  Args:
44
- repo_id: The Hugging Face dataset repository ID
45
- token: Authentication token for private datasets
46
 
47
  Returns:
48
- The dataset object
49
  """
50
  try:
51
  # Try to load the dataset
52
- dataset = load_dataset(repo_id, token=token)
53
- return dataset["train"]
54
- except Exception as e:
55
- logger.info(f"Dataset {repo_id} not found or error loading it: {e}")
56
-
57
- # Create a new dataset with the required schema
58
- features = Features({
59
- "ip": Value("string"),
60
- "port": Value("int32"),
61
- "country": Value("string", default_value=""),
62
- "region": Value("string", default_value=""),
63
- "org": Value("string", default_value=""),
64
- "models": Sequence({
65
- "name": Value("string"),
66
- "family": Value("string", default_value=""),
67
- "parameter_size": Value("string", default_value=""),
68
- "quantization_level": Value("string", default_value=""),
69
- "digest": Value("string", default_value=""),
70
- "modified_at": Value("string", default_value=""),
71
- "size": Value("int64", default_value=0)
72
- }, default_value=[])
73
- })
74
-
75
- # Create empty dataset
76
- empty_dataset = Dataset.from_dict({
77
- "ip": [],
78
- "port": [],
79
- "country": [],
80
- "region": [],
81
- "org": [],
82
- "models": []
83
- }, features=features)
84
 
85
- # Push to hub
86
- empty_dataset.push_to_hub(repo_id, token=token)
87
-
88
- return empty_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- def update_dataset_entry(dataset: Dataset, entry: Dict[str, Any]) -> Dataset:
91
  """
92
- Add or update an entry in the dataset.
93
 
94
  Args:
95
  dataset: The dataset to update
96
- entry: The entry to add or update
97
 
98
  Returns:
99
- The updated dataset
100
  """
101
- # Check if entry already exists by IP and port
102
- ip = entry["ip"]
103
- port = entry["port"]
104
-
105
- # Find the index if it exists
106
- existing_idx = None
107
- for idx, item in enumerate(dataset):
108
- if item["ip"] == ip and item["port"] == port:
109
- existing_idx = idx
110
- break
111
-
112
- if existing_idx is not None:
113
- # Update existing entry
114
- dataset = dataset.select(list(range(len(dataset))))
115
- new_examples = list(dataset)
116
- new_examples[existing_idx] = entry
117
- return Dataset.from_dict({k: [ex[k] for ex in new_examples] for k in dataset.column_names})
118
- else:
119
- # Add new entry
120
- new_dataset = dataset.add_item(entry)
121
- return new_dataset
122
-
123
- def push_dataset_to_hub(dataset: Dataset, repo_id: str, token: Optional[str] = None):
124
- """
125
- Push dataset to the Hugging Face Hub.
126
 
127
- Args:
128
- dataset: The dataset to push
129
- repo_id: The repository ID
130
- token: Authentication token
131
- """
132
- try:
133
- dataset.push_to_hub(repo_id, token=token)
134
- logger.info(f"Successfully pushed dataset to {repo_id}")
135
- except Exception as e:
136
- logger.error(f"Error pushing dataset to hub: {e}")
137
-
138
- # Shodan functions
139
- def scan_with_shodan(shodan_api_key: str, query: str, max_results: int = 1000) -> List[Dict[str, Any]]:
140
- """
141
- Scan with Shodan API for Ollama instances.
142
 
143
- Args:
144
- shodan_api_key: Shodan API key
145
- query: Shodan search query
146
- max_results: Maximum number of results to return
147
 
148
- Returns:
149
- List of discovered instances
150
- """
151
- if not shodan_api_key:
152
- logger.error("No Shodan API key provided")
153
- return []
154
-
155
- try:
156
- api = shodan.Shodan(shodan_api_key)
157
- results = []
158
-
159
- # Get the number of total results
160
- count_result = api.count(query)
161
- total_results = count_result['total']
162
- logger.info(f"Found {total_results} results for query: {query}")
163
-
164
- # Limit to max_results
165
- pages = min(total_results, max_results) // 100
166
- if min(total_results, max_results) % 100 > 0:
167
- pages += 1
168
-
169
- for page in range(1, pages + 1):
170
- try:
171
- result_page = api.search(query, page=page)
172
- for match in result_page['matches']:
173
- instance = {
174
- "ip": match.get("ip_str", ""),
175
- "port": match.get("port", 11434), # Default Ollama port
176
- "country": match.get("location", {}).get("country_name", ""),
177
- "region": match.get("location", {}).get("region_name", ""),
178
- "org": match.get("org", ""),
179
- "models": [] # Will be populated later
180
- }
181
- results.append(instance)
182
- logger.info(f"Processed page {page}/{pages}")
183
- except shodan.APIError as e:
184
- logger.error(f"Shodan API error on page {page}: {e}")
185
  break
186
 
187
- return results
188
- except shodan.APIError as e:
189
- logger.error(f"Shodan API error: {e}")
190
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  # Ollama endpoint checking
193
- def check_ollama_endpoint(instance: Dict[str, Any], timeout: int = 5) -> Dict[str, Any]:
194
  """
195
- Check an Ollama endpoint and extract model information.
196
 
197
  Args:
198
- instance: Instance information (ip, port, etc.)
199
- timeout: Request timeout in seconds
200
 
201
  Returns:
202
- Updated instance with model information
203
  """
204
- ip = instance["ip"]
205
- port = instance["port"]
206
  url = f"http://{ip}:{port}/api/tags"
207
-
208
- updated_instance = instance.copy()
209
 
210
  try:
211
- response = requests.get(url, timeout=timeout)
212
  response.raise_for_status()
213
 
214
  data = response.json()
215
-
216
  if "models" in data:
217
- models_list = []
218
- for model in data["models"]:
219
- # Extract model details
220
  model_info = {
221
- "name": model.get("name", ""),
222
- "family": model.get("details", {}).get("family", ""),
223
- "parameter_size": model.get("details", {}).get("parameter_size", ""),
224
- "quantization_level": model.get("details", {}).get("quantization_level", ""),
225
- "digest": model.get("digest", ""),
226
- "modified_at": model.get("modified_at", ""),
227
- "size": model.get("size", 0)
228
  }
229
- models_list.append(model_info)
230
-
231
- updated_instance["models"] = models_list
232
- logger.info(f"Successfully extracted {len(models_list)} models from {ip}:{port}")
233
- else:
234
- logger.warning(f"No models found in response from {ip}:{port}")
235
- updated_instance["models"] = []
236
-
237
  except requests.exceptions.RequestException as e:
238
- logger.error(f"Network error for {ip}:{port}: {e}")
239
- updated_instance["models"] = []
240
  except ValueError as e:
241
  logger.error(f"Invalid JSON from {ip}:{port}: {e}")
242
- updated_instance["models"] = []
243
  except Exception as e:
244
- logger.exception(f"Unexpected error for {ip}:{port}: {e}")
245
- updated_instance["models"] = []
246
 
247
- return updated_instance
 
 
 
 
 
248
 
249
- # Authentication functions
250
- def verify_password(password: str, stored_password: str) -> bool:
251
  """
252
- Verify if the entered password matches the stored password.
253
 
254
  Args:
255
- password: The entered password
256
- stored_password: The stored password (hashed or plaintext)
257
 
258
  Returns:
259
- True if passwords match, False otherwise
260
  """
261
- # Sanitize input
262
- password = sanitize_input(password)
 
 
263
 
264
- # Check if stored password is a bcrypt hash
265
- if stored_password.startswith('$2b$'):
266
- return bcrypt.checkpw(password.encode('utf-8'), stored_password.encode('utf-8'))
267
- else:
268
- # Direct comparison for development/testing
269
- return password == stored_password
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- # UI creation
272
- def create_ui():
273
  """
274
- Create the Gradio UI for the application.
275
 
276
  Returns:
277
- Gradio interface
278
  """
279
- # Get secrets
280
- admin_password = get_secret("ADMIN_PASSWORD")
281
- if not admin_password:
282
- admin_password = "admin" # Default for development (should be replaced in production)
283
- logger.warning("Admin password not set, using default (insecure)")
284
-
285
- shodan_api_key = get_secret("SHODAN_API_KEY")
286
- if not shodan_api_key:
287
- logger.warning("Shodan API key not set, scans will not work")
288
-
289
- shodan_query = get_secret("SHODAN_QUERY")
290
- if not shodan_query:
291
- shodan_query = "product:Ollama port:11434"
292
- logger.info(f"Using default Shodan query: {shodan_query}")
293
-
294
- hf_token = get_secret("HF_TOKEN")
295
-
296
- # Load dataset
297
- dataset_repo_id = "latterworks/llama_checker_results"
298
- dataset = get_or_create_dataset(dataset_repo_id, token=hf_token)
299
-
300
- # Function to search and display models
301
- def search_models(family, param_size, name, current_dataset):
302
- # Sanitize inputs
303
- name = sanitize_input(name)
304
-
305
- results = []
306
 
307
- for item in current_dataset:
308
- if item["models"]:
309
- for model in item["models"]:
310
- # Apply filters
311
- if family != "All" and model["family"] != family:
312
- continue
313
-
314
- if param_size != "All" and model["parameter_size"] != param_size:
315
- continue
316
-
317
- if name and name.lower() not in model["name"].lower():
318
- continue
319
-
320
- # Calculate size in GB
321
- size_gb = round(model["size"] / (1024 * 1024 * 1024), 2) if model["size"] else 0
322
-
323
- # Add to results
324
- results.append([
325
- model["name"],
326
- model["family"],
327
- model["parameter_size"],
328
- model["quantization_level"],
329
- size_gb
330
- ])
331
 
332
- return results
 
 
 
 
 
 
 
 
 
 
 
333
 
334
- # Function to display model details
335
- def show_model_details(evt: gr.SelectData, results, is_admin, current_dataset):
336
- selected_row = evt.index[0]
337
- model_name = results[selected_row][0]
338
 
339
- # Find the model
340
- for item in current_dataset:
341
- if item["models"]:
342
- for model in item["models"]:
343
- if model["name"] == model_name:
344
- details = {
345
- "name": model["name"],
346
- "family": model["family"],
347
- "parameter_size": model["parameter_size"],
348
- "quantization_level": model["quantization_level"],
349
- "digest": model["digest"],
350
- "modified_at": model["modified_at"],
351
- "size_bytes": model["size"],
352
- "size_gb": round(model["size"] / (1024 * 1024 * 1024), 2) if model["size"] else 0
353
- }
354
-
355
- # Include IP and port for admin users
356
- if is_admin:
357
- details["ip"] = item["ip"]
358
- details["port"] = item["port"]
359
- details["country"] = item["country"]
360
- details["region"] = item["region"]
361
- details["org"] = item["org"]
362
-
363
- return details
364
 
365
- return {"error": "Model not found"}
 
 
 
 
366
 
367
- # Function to update admin visibility
368
- def update_admin_visibility(is_admin):
369
- return (
370
- gr.update(visible=not is_admin), # admin_required
371
- gr.update(visible=is_admin) # scan_group
372
- )
 
 
 
 
 
 
 
 
373
 
374
- # Function to perform scan
375
- def perform_scan(max_results, is_admin, current_dataset):
376
- if not is_admin:
377
- return "⚠️ Admin login required", [], current_dataset
 
 
 
 
 
 
 
 
378
 
379
- # Start scan
380
- yield "🔍 Starting Shodan scan...", [], current_dataset
 
 
 
 
 
 
 
 
 
 
381
 
382
- try:
383
- # Get instances from Shodan
384
- instances = scan_with_shodan(shodan_api_key, shodan_query, max_results)
385
- yield f"🔍 Found {len(instances)} instances. Checking endpoints...", [], current_dataset
386
-
387
- # Check endpoints using executor
388
- updated_instances = []
389
- with ThreadPoolExecutor(max_workers=10) as executor:
390
- # Create future tasks
391
- future_to_instance = {
392
- executor.submit(check_ollama_endpoint, instance): instance
393
- for instance in instances
394
- }
395
 
396
- # Process completed tasks
397
- for future in as_completed(future_to_instance):
398
- try:
399
- updated_instance = future.result()
400
- updated_instances.append(updated_instance)
401
- except Exception as e:
402
- instance = future_to_instance[future]
403
- logger.exception(f"Error processing {instance['ip']}:{instance['port']}: {e}")
404
- # In case of error, append the original instance without model info
405
- instance["models"] = []
406
- updated_instances.append(instance)
407
-
408
- # Update dataset
409
- updated_dataset = current_dataset
410
- for instance in updated_instances:
411
- updated_dataset = update_dataset_entry(updated_dataset, instance)
412
-
413
- # Push to hub
414
- push_dataset_to_hub(updated_dataset, dataset_repo_id, token=hf_token)
415
-
416
- # Prepare results for display
417
- results = []
418
- total_models = 0
419
- for instance in updated_instances:
420
- models_count = len(instance["models"]) if instance["models"] else 0
421
- total_models += models_count
422
- results.append([
423
- instance["ip"],
424
- instance["port"],
425
- instance["country"],
426
- instance["region"],
427
- instance["org"],
428
- models_count
429
- ])
430
-
431
- yield f"✅ Scan completed! Found {len(instances)} instances with a total of {total_models} models.", results, updated_dataset
432
-
433
- except Exception as e:
434
- logger.exception(f"Error during scan: {e}")
435
- yield f"❌ Error during scan: {str(e)}", [], current_dataset
436
 
437
- # Create Gradio UI
438
- with gr.Blocks(title="Ollama Instance Scanner") as app:
439
- # State variables
440
- current_dataset = gr.State(dataset)
441
- is_admin = gr.State(False)
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
- with gr.Tab("Browse Models"):
444
- # Filters
445
- with gr.Row():
446
- with gr.Column():
447
- # Extract unique values for family and parameter_size
448
- families = set()
449
- parameter_sizes = set()
450
- for item in dataset:
451
- if item["models"]:
452
- for model in item["models"]:
453
- if model["family"]:
454
- families.add(model["family"])
455
- if model["parameter_size"]:
456
- parameter_sizes.add(model["parameter_size"])
457
-
458
- # Create dropdowns
459
- family_dropdown = gr.Dropdown(
460
- choices=["All"] + sorted(list(families)),
461
- value="All",
462
- label="Model Family"
463
- )
464
- param_size_dropdown = gr.Dropdown(
465
- choices=["All"] + sorted(list(parameter_sizes)),
466
- value="All",
467
- label="Parameter Size"
468
- )
469
- name_search = gr.Textbox(
470
- value="",
471
- label="Model Name Contains"
472
- )
473
-
474
- # Search button
475
- search_btn = gr.Button("Search")
476
-
477
- # Results dataframe
478
- results_df = gr.DataFrame(
479
- value=[],
480
- headers=["Name", "Family", "Parameter Size", "Quantization", "Size (GB)"],
481
- label="Model Results"
482
- )
483
-
484
- # Detailed info JSON viewer
485
- model_details = gr.JSON(label="Model Details")
486
-
487
- # Connect events
488
- search_btn.click(
489
- search_models,
490
- inputs=[family_dropdown, param_size_dropdown, name_search, current_dataset],
491
- outputs=[results_df]
492
  )
493
-
494
- results_df.select(
495
- show_model_details,
496
- inputs=[results_df, is_admin, current_dataset],
497
- outputs=[model_details]
498
  )
499
 
500
- with gr.Tab("Admin Login"):
501
- admin_pw_input = gr.Textbox(
 
 
 
 
 
 
502
  value="",
503
- type="password",
504
- label="Admin Password"
505
  )
506
- login_btn = gr.Button("Login")
507
- login_status = gr.Markdown("Not logged in")
508
 
509
- def admin_login(password):
510
- if verify_password(password, admin_password):
511
- return "✅ Successfully logged in as admin", True
512
- else:
513
- return "❌ Invalid password", False
 
 
514
 
515
- login_btn.click(
516
- admin_login,
517
- inputs=[admin_pw_input],
518
- outputs=[login_status, is_admin]
519
  )
520
 
521
- with gr.Tab("Shodan Scan") as shodan_tab:
522
- # This tab is initially hidden and only shown to admins
523
- admin_required = gr.Markdown("⚠️ Admin login required to access this feature")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
 
525
- with gr.Group(visible=False) as scan_group:
526
- max_results = gr.Slider(
527
- minimum=10,
528
- maximum=1000,
529
- value=100,
530
- step=10,
531
- label="Max Results"
532
  )
533
- scan_btn = gr.Button("Start Scan")
534
- scan_status = gr.Markdown("Ready to scan")
 
 
535
 
536
- # Admin results dataframe with IP and port
537
- admin_results_df = gr.DataFrame(
538
- value=[],
539
- headers=["IP", "Port", "Country", "Region", "Organization", "Models Count"],
540
- label="Scan Results"
541
- )
 
 
 
 
 
 
 
542
 
543
- # Connect events
544
- is_admin.change(
545
- update_admin_visibility,
546
- inputs=[is_admin],
547
- outputs=[admin_required, scan_group]
548
  )
549
 
550
- scan_btn.click(
551
- perform_scan,
552
- inputs=[max_results, is_admin, current_dataset],
553
- outputs=[scan_status, admin_results_df, current_dataset]
554
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
555
 
556
  return app
557
 
558
- # Main function
559
- def main():
560
- app = create_ui()
561
- app.launch()
562
-
563
  if __name__ == "__main__":
564
- main()
 
 
1
  import os
2
  import logging
3
+ import asyncio
4
  import bcrypt
5
+ import requests
 
 
 
 
 
6
  import shodan
7
+ import gradio as gr
8
+ from typing import List, Dict, Any, Tuple, Optional
9
+ from datasets import load_dataset, Dataset
10
+ from huggingface_hub import HfApi, create_repo
11
 
12
+ # Setup logging
13
  logging.basicConfig(
14
  level=logging.INFO,
15
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
16
  )
17
  logger = logging.getLogger(__name__)
18
 
19
+ # Environment variable validation
20
+ required_env_vars = ["SHODAN_API_KEY", "ADMIN_PASSWORD", "SHODAN_QUERY"]
21
+ for var in required_env_vars:
22
+ if not os.environ.get(var):
23
+ logger.warning(f"Environment variable {var} is not set")
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  # Dataset functions
26
+ def get_or_create_dataset(dataset_name: str = "latterworks/llama_checker_results") -> Optional[Dataset]:
27
  """
28
+ Load the dataset or create it if it doesn't exist.
29
 
30
  Args:
31
+ dataset_name: The name of the dataset on Hugging Face Hub
 
32
 
33
  Returns:
34
+ The dataset or None if there was an error
35
  """
36
  try:
37
  # Try to load the dataset
38
+ token = os.environ.get("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ try:
41
+ dataset_dict = load_dataset(dataset_name, token=token)
42
+ if "train" in dataset_dict:
43
+ return dataset_dict["train"]
44
+ else:
45
+ # If no "train" split, try to use the first available split
46
+ first_split = next(iter(dataset_dict))
47
+ return dataset_dict[first_split]
48
+ except Exception as e:
49
+ logger.error(f"Failed to load dataset {dataset_name}: {e}")
50
+
51
+ # Create the dataset
52
+ try:
53
+ # Create the repository
54
+ hf_api = HfApi(token=token)
55
+ create_repo(repo_id=dataset_name, repo_type="dataset", token=token)
56
+
57
+ # Create empty dataset with the correct schema
58
+ empty_dataset = Dataset.from_dict({
59
+ "ip": [],
60
+ "port": [],
61
+ "country": [],
62
+ "region": [],
63
+ "org": [],
64
+ "models": []
65
+ })
66
+
67
+ # Push to Hub
68
+ empty_dataset.push_to_hub(dataset_name, token=token)
69
+
70
+ return empty_dataset
71
+ except Exception as create_e:
72
+ logger.error(f"Failed to create dataset: {create_e}")
73
+ return None
74
+ except Exception as e:
75
+ logger.exception(f"Unexpected error in get_or_create_dataset: {e}")
76
+ return None
77
 
78
+ def update_dataset(dataset: Dataset, new_entries: List[Dict[str, Any]]) -> Optional[Dataset]:
79
  """
80
+ Update the dataset with new entries from Shodan scan.
81
 
82
  Args:
83
  dataset: The dataset to update
84
+ new_entries: List of new entries to add or update in the dataset
85
 
86
  Returns:
87
+ The updated dataset or None if there was an error
88
  """
89
+ if dataset is None:
90
+ logger.error("Cannot update None dataset")
91
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
+ # Convert dataset to dictionaries for easier manipulation
94
+ dataset_dict = dataset.to_dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Add new entries
97
+ for entry in new_entries:
98
+ ip = entry.get("ip")
99
+ port = entry.get("port")
100
 
101
+ # Check if this IP:port combination already exists
102
+ found = False
103
+ for i, (existing_ip, existing_port) in enumerate(zip(dataset_dict["ip"], dataset_dict["port"])):
104
+ if existing_ip == ip and existing_port == port:
105
+ # Update the entry
106
+ dataset_dict["country"][i] = entry.get("country", dataset_dict["country"][i])
107
+ dataset_dict["region"][i] = entry.get("region", dataset_dict["region"][i])
108
+ dataset_dict["org"][i] = entry.get("org", dataset_dict["org"][i])
109
+ dataset_dict["models"][i] = entry.get("models", dataset_dict["models"][i])
110
+ found = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  break
112
 
113
+ if not found:
114
+ # Add as a new entry
115
+ dataset_dict["ip"].append(entry.get("ip", ""))
116
+ dataset_dict["port"].append(entry.get("port", 0))
117
+ dataset_dict["country"].append(entry.get("country", ""))
118
+ dataset_dict["region"].append(entry.get("region", ""))
119
+ dataset_dict["org"].append(entry.get("org", ""))
120
+ dataset_dict["models"].append(entry.get("models", []))
121
+
122
+ # Convert back to Dataset
123
+ updated_dataset = Dataset.from_dict(dataset_dict)
124
+
125
+ # Push to Hub
126
+ token = os.environ.get("HF_TOKEN")
127
+ updated_dataset.push_to_hub("latterworks/llama_checker_results", token=token)
128
+
129
+ return updated_dataset
130
 
131
  # Ollama endpoint checking
132
+ async def check_ollama_endpoint(ip: str, port: int) -> Dict[str, Any]:
133
  """
134
+ Check a single Ollama endpoint and retrieve model information.
135
 
136
  Args:
137
+ ip: The IP address of the Ollama instance
138
+ port: The port of the Ollama instance
139
 
140
  Returns:
141
+ A dictionary with IP, port, models, and status information
142
  """
 
 
143
  url = f"http://{ip}:{port}/api/tags"
144
+ models = []
145
+ status = "success"
146
 
147
  try:
148
+ response = requests.get(url, timeout=5)
149
  response.raise_for_status()
150
 
151
  data = response.json()
 
152
  if "models" in data:
153
+ for model_data in data["models"]:
154
+ details = model_data.get("details", {})
 
155
  model_info = {
156
+ "name": model_data.get("name", ""),
157
+ "family": details.get("family", ""),
158
+ "parameter_size": details.get("parameter_size", ""),
159
+ "quantization_level": details.get("quantization_level", ""),
160
+ "digest": model_data.get("digest", ""),
161
+ "modified_at": model_data.get("modified_at", ""),
162
+ "size": model_data.get("size", 0)
163
  }
164
+ models.append(model_info)
 
 
 
 
 
 
 
165
  except requests.exceptions.RequestException as e:
166
+ logger.error(f"Network error when checking {ip}:{port}: {e}")
167
+ status = "connection failed"
168
  except ValueError as e:
169
  logger.error(f"Invalid JSON from {ip}:{port}: {e}")
170
+ status = "invalid json"
171
  except Exception as e:
172
+ logger.exception(f"Unexpected error when checking {ip}:{port}")
173
+ status = "unexpected error"
174
 
175
+ return {
176
+ "ip": ip,
177
+ "port": port,
178
+ "models": models,
179
+ "status": status
180
+ }
181
 
182
+ async def check_ollama_endpoints(entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
 
183
  """
184
+ Check multiple Ollama endpoints concurrently.
185
 
186
  Args:
187
+ entries: List of entries containing IP and port information
 
188
 
189
  Returns:
190
+ List of updated entries with model information
191
  """
192
+ tasks = []
193
+ for entry in entries:
194
+ task = asyncio.create_task(check_ollama_endpoint(entry["ip"], entry["port"]))
195
+ tasks.append((entry, task))
196
 
197
+ results = []
198
+ for entry, task in tasks:
199
+ try:
200
+ result = await task
201
+ # Merge the result with the original entry
202
+ # This preserves fields like country, region, and org
203
+ updated_entry = entry.copy()
204
+ updated_entry["models"] = result["models"]
205
+ updated_entry["status"] = result["status"]
206
+ results.append(updated_entry)
207
+ except Exception as e:
208
+ logger.error(f"Error checking endpoint {entry.get('ip')}:{entry.get('port')}: {e}")
209
+ entry["models"] = []
210
+ entry["status"] = "error"
211
+ results.append(entry)
212
+
213
+ return results
214
 
215
+ # Shodan scanning
216
+ def run_shodan_scan() -> List[Dict[str, Any]]:
217
  """
218
+ Run a Shodan scan for Ollama instances.
219
 
220
  Returns:
221
+ List of entries containing IP, port, and location information
222
  """
223
+ api_key = os.environ.get("SHODAN_API_KEY")
224
+ if not api_key:
225
+ logger.error("SHODAN_API_KEY environment variable not set")
226
+ return []
227
+
228
+ query = os.environ.get("SHODAN_QUERY", "product:Ollama port:11434")
229
+
230
+ try:
231
+ api = shodan.Shodan(api_key)
232
+ results = api.search(query, limit=1000)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ entries = []
235
+ for result in results["matches"]:
236
+ entry = {
237
+ "ip": result.get("ip_str", ""),
238
+ "port": result.get("port", 0),
239
+ "country": result.get("location", {}).get("country_name", ""),
240
+ "region": result.get("location", {}).get("region_name", ""),
241
+ "org": result.get("org", ""),
242
+ "models": []
243
+ }
244
+ entries.append(entry)
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ return entries
247
+ except shodan.APIError as e:
248
+ logger.error(f"Shodan API error: {e}")
249
+ return []
250
+ except Exception as e:
251
+ logger.exception(f"Unexpected error in run_shodan_scan")
252
+ return []
253
+
254
+ # Password validation
255
+ def validate_admin_password(password: str) -> bool:
256
+ """
257
+ Validate the admin password.
258
 
259
+ Args:
260
+ password: The entered password to validate
 
 
261
 
262
+ Returns:
263
+ True if the password is valid, False otherwise
264
+ """
265
+ stored_password = os.environ.get("ADMIN_PASSWORD")
266
+ if not stored_password:
267
+ logger.error("ADMIN_PASSWORD environment variable not set")
268
+ return False
269
+
270
+ # If the stored password starts with '$2b', it's a bcrypt hash
271
+ if stored_password.startswith('$2b'):
272
+ return bcrypt.checkpw(password.encode('utf-8'), stored_password.encode('utf-8'))
273
+ else:
274
+ # Otherwise, do a direct comparison
275
+ return password == stored_password
276
+
277
+ # Gradio UI functions
278
+ def get_model_families_and_sizes(dataset: Dataset) -> Tuple[List[str], List[str]]:
279
+ """
280
+ Extract all unique model families and parameter sizes from the dataset.
281
+
282
+ Args:
283
+ dataset: The dataset to extract from
 
 
 
284
 
285
+ Returns:
286
+ Tuple of (families, parameter_sizes)
287
+ """
288
+ if dataset is None:
289
+ return [], []
290
 
291
+ families = set()
292
+ parameter_sizes = set()
293
+
294
+ for i in range(len(dataset)):
295
+ models = dataset[i]["models"]
296
+ if models:
297
+ for model in models:
298
+ family = model.get("family")
299
+ param_size = model.get("parameter_size")
300
+
301
+ if family:
302
+ families.add(family)
303
+ if param_size:
304
+ parameter_sizes.add(param_size)
305
 
306
+ return sorted(list(families)), sorted(list(parameter_sizes))
307
+
308
+ def search_models(family: str, parameter_size: str, name: str, dataset: Dataset, is_admin: bool) -> Tuple[List[Dict], Dict]:
309
+ """
310
+ Search for models in the dataset based on filters.
311
+
312
+ Args:
313
+ family: Filter by model family
314
+ parameter_size: Filter by parameter size
315
+ name: Filter by model name
316
+ dataset: The dataset to search in
317
+ is_admin: Whether the user is an admin
318
 
319
+ Returns:
320
+ Tuple of (filtered_models, empty_details)
321
+ """
322
+ if dataset is None:
323
+ return [], {}
324
+
325
+ # Collect all models from the dataset
326
+ all_models = []
327
+ for i in range(len(dataset)):
328
+ ip = dataset[i]["ip"]
329
+ port = dataset[i]["port"]
330
+ models = dataset[i]["models"]
331
 
332
+ if models:
333
+ for model in models:
334
+ model_copy = model.copy()
 
 
 
 
 
 
 
 
 
 
335
 
336
+ # Add source info if admin
337
+ if is_admin:
338
+ model_copy["source_ip"] = ip
339
+ model_copy["source_port"] = port
340
+
341
+ # Calculate size in GB
342
+ if "size" in model:
343
+ model_copy["size_gb"] = round(model["size"] / (1024**3), 2)
344
+ else:
345
+ model_copy["size_gb"] = 0
346
+
347
+ all_models.append(model_copy)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ # Apply filters
350
+ filtered_models = all_models
351
+ if family:
352
+ filtered_models = [m for m in filtered_models if m.get("family") == family]
353
+ if parameter_size:
354
+ filtered_models = [m for m in filtered_models if m.get("parameter_size") == parameter_size]
355
+ if name:
356
+ filtered_models = [m for m in filtered_models if name.lower() in m.get("name", "").lower()]
357
+
358
+ return filtered_models, {}
359
+
360
+ def select_model(evt: gr.SelectData, models: List[Dict]) -> Dict:
361
+ """
362
+ Handle model selection from the table.
363
+
364
+ Args:
365
+ evt: The selection event
366
+ models: The list of models
367
 
368
+ Returns:
369
+ The selected model details
370
+ """
371
+ if not models or evt.index >= len(models):
372
+ return {}
373
+
374
+ return models[evt.index]
375
+
376
+ async def scan_worker() -> str:
377
+ """
378
+ Run the complete scan workflow.
379
+
380
+ Returns:
381
+ Status message
382
+ """
383
+ # Run Shodan scan
384
+ entries = run_shodan_scan()
385
+ if not entries:
386
+ return "No Ollama instances found or scan failed"
387
+
388
+ # Check endpoints
389
+ updated_entries = await check_ollama_endpoints(entries)
390
+
391
+ # Update dataset
392
+ dataset = get_or_create_dataset()
393
+ if dataset is not None:
394
+ update_dataset(dataset, updated_entries)
395
+ return f"Scan completed. Found {len(entries)} Ollama instances."
396
+ else:
397
+ return "Scan completed but failed to update dataset"
398
+
399
+ # Main application
400
+ def create_app():
401
+ # Load the dataset
402
+ dataset = get_or_create_dataset()
403
+
404
+ # Get model families and parameter sizes
405
+ families, parameter_sizes = [], []
406
+ if dataset is not None:
407
+ families, parameter_sizes = get_model_families_and_sizes(dataset)
408
+
409
+ with gr.Blocks(title="Ollama Instance Explorer") as app:
410
+ # Admin login section
411
+ with gr.Row():
412
+ admin_password = gr.Textbox(
413
+ label="Admin Password",
414
+ type="password",
415
+ placeholder="Enter admin password"
 
416
  )
417
+ login_button = gr.Button("Login")
418
+ login_status = gr.Textbox(
419
+ label="Login Status",
420
+ value="",
421
+ interactive=False
422
  )
423
 
424
+ # Admin state
425
+ is_admin = gr.State(False)
426
+
427
+ # Admin-only section
428
+ with gr.Tab("Shodan Scan", visible=False) as admin_tab:
429
+ scan_button = gr.Button("Start Scan")
430
+ scan_status = gr.Textbox(
431
+ label="Scan Status",
432
  value="",
433
+ interactive=False
 
434
  )
 
 
435
 
436
+ def on_scan_click():
437
+ # We can't use async directly with Gradio, so use asyncio.run
438
+ try:
439
+ return asyncio.run(scan_worker())
440
+ except Exception as e:
441
+ logger.exception("Error during scan")
442
+ return f"Error during scan: {str(e)}"
443
 
444
+ scan_button.click(
445
+ on_scan_click,
446
+ inputs=[],
447
+ outputs=[scan_status]
448
  )
449
 
450
+ # Public section
451
+ with gr.Tab("Browse Models"):
452
+ with gr.Row():
453
+ family_filter = gr.Dropdown(
454
+ label="Family",
455
+ choices=[""] + families,
456
+ value=""
457
+ )
458
+ parameter_size_filter = gr.Dropdown(
459
+ label="Parameter Size",
460
+ choices=[""] + parameter_sizes,
461
+ value=""
462
+ )
463
+ name_filter = gr.Textbox(
464
+ label="Name Search",
465
+ placeholder="Enter model name to search"
466
+ )
467
+
468
+ search_button = gr.Button("Search")
469
 
470
+ with gr.Row():
471
+ models_table = gr.DataFrame(
472
+ headers=["name", "family", "parameter_size", "quantization_level", "size_gb"],
473
+ datatype=["str", "str", "str", "str", "number"],
474
+ interactive=False
 
 
475
  )
476
+ model_details = gr.JSON(label="Model Details")
477
+
478
+ def on_search(family, parameter_size, name, admin_status):
479
+ models, _ = search_models(family, parameter_size, name, dataset, admin_status)
480
 
481
+ # Create DataFrame-friendly format
482
+ df_data = []
483
+ for model in models:
484
+ row = {
485
+ "name": model.get("name", ""),
486
+ "family": model.get("family", ""),
487
+ "parameter_size": model.get("parameter_size", ""),
488
+ "quantization_level": model.get("quantization_level", ""),
489
+ "size_gb": model.get("size_gb", 0)
490
+ }
491
+ df_data.append(row)
492
+
493
+ return df_data, {}
494
 
495
+ search_button.click(
496
+ on_search,
497
+ inputs=[family_filter, parameter_size_filter, name_filter, is_admin],
498
+ outputs=[models_table, model_details]
 
499
  )
500
 
501
+ models_table.select(
502
+ select_model,
503
+ inputs=[models_table],
504
+ outputs=[model_details]
505
  )
506
+
507
+ # Handle login
508
+ def on_login(password):
509
+ if validate_admin_password(password):
510
+ return True, gr.update(visible=True), "Login successful"
511
+ else:
512
+ return False, gr.update(visible=False), "Invalid password"
513
+
514
+ login_button.click(
515
+ on_login,
516
+ inputs=[admin_password],
517
+ outputs=[is_admin, admin_tab, login_status]
518
+ )
519
+
520
+ # Initial search on load
521
+ app.load(
522
+ lambda: on_search("", "", "", False),
523
+ inputs=None,
524
+ outputs=[models_table, model_details]
525
+ )
526
 
527
  return app
528
 
529
+ # Run the app
 
 
 
 
530
  if __name__ == "__main__":
531
+ app = create_app()
532
+ app.launch()