Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,72 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
-
import
|
3 |
-
import
|
|
|
4 |
import logging
|
5 |
-
import
|
6 |
import shodan
|
7 |
-
import asyncio
|
8 |
import aiohttp
|
9 |
-
import
|
10 |
-
import
|
11 |
-
from
|
12 |
-
from
|
13 |
|
14 |
-
# Configure
|
15 |
logging.basicConfig(
|
16 |
level=logging.INFO,
|
17 |
-
format='%(asctime)s - %(name)s - %(levelname)s
|
18 |
-
handlers=[
|
19 |
-
logging.StreamHandler(),
|
20 |
-
logging.FileHandler("ollama_scanner.log")
|
21 |
-
]
|
22 |
)
|
23 |
logger = logging.getLogger(__name__)
|
24 |
|
25 |
-
|
|
|
|
|
26 |
"""
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
|
29 |
Returns:
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
Raises:
|
33 |
-
ValueError:
|
34 |
-
ConnectionError: When network issues prevent dataset access
|
35 |
"""
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
try:
|
45 |
-
#
|
46 |
-
|
47 |
-
dataset = datasets.load_dataset(dataset_id, token=hf_token)
|
48 |
-
except TypeError:
|
49 |
-
# Fallback: Use legacy authentication parameter
|
50 |
-
logger.info("Attempting legacy authentication method")
|
51 |
-
dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
else:
|
62 |
-
# Handle direct Dataset object (no splits)
|
63 |
-
return dataset
|
64 |
-
|
65 |
except FileNotFoundError:
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
70 |
"ip": [],
|
71 |
"port": [],
|
72 |
"country": [],
|
@@ -74,728 +159,450 @@ def load_or_create_dataset():
|
|
74 |
"org": [],
|
75 |
"models": []
|
76 |
})
|
|
|
|
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
dataset = datasets.load_dataset(dataset_id, token=hf_token)
|
86 |
-
except TypeError:
|
87 |
-
dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
|
88 |
-
|
89 |
-
# Extract appropriate split
|
90 |
-
if isinstance(dataset, datasets.DatasetDict):
|
91 |
-
if "train" in dataset:
|
92 |
-
return dataset["train"]
|
93 |
-
first_split = next(iter(dataset))
|
94 |
-
logger.info(f"Using '{first_split}' split from newly created dataset")
|
95 |
-
return dataset[first_split]
|
96 |
-
else:
|
97 |
-
return dataset
|
98 |
-
|
99 |
-
except Exception as creation_error:
|
100 |
-
logger.error(f"Dataset creation failed: {creation_error}")
|
101 |
-
raise ValueError(f"Failed to create dataset: {creation_error}") from creation_error
|
102 |
-
|
103 |
-
except (ConnectionError, TimeoutError) as network_error:
|
104 |
-
logger.error(f"Network error accessing dataset: {network_error}")
|
105 |
-
raise ConnectionError(f"Network failure accessing HuggingFace Hub: {network_error}") from network_error
|
106 |
-
|
107 |
-
except Exception as general_error:
|
108 |
-
logger.error(f"Unexpected error accessing dataset: {general_error}")
|
109 |
-
raise ValueError(f"Dataset access failed: {general_error}") from general_error
|
110 |
|
111 |
-
def
|
112 |
"""
|
113 |
-
|
114 |
|
115 |
Args:
|
116 |
-
|
|
|
|
|
117 |
|
118 |
Returns:
|
119 |
-
List of
|
120 |
"""
|
121 |
-
# API key fetch - no validation needed as it's centralized at startup
|
122 |
-
api_key = os.getenv("SHODAN_API_KEY")
|
123 |
-
shodan_query = os.getenv("SHODAN_QUERY", "product:Ollama port:11434")
|
124 |
-
|
125 |
-
api = shodan.Shodan(api_key)
|
126 |
-
|
127 |
-
try:
|
128 |
-
logger.info(f"Executing Shodan search_cursor with query: {shodan_query}")
|
129 |
-
|
130 |
-
# Use search_cursor to handle pagination automatically
|
131 |
-
cursor = api.search_cursor(shodan_query)
|
132 |
-
|
133 |
-
# Initialize scan metrics
|
134 |
-
instances = []
|
135 |
-
processed = 0
|
136 |
-
batch_size = 100 # Process results in batches for progress updates
|
137 |
-
|
138 |
-
progress(0, desc="Initializing Shodan data retrieval")
|
139 |
-
|
140 |
-
# Process all results from the cursor
|
141 |
-
results_batch = []
|
142 |
-
for result in cursor:
|
143 |
-
results_batch.append(result)
|
144 |
-
processed += 1
|
145 |
-
|
146 |
-
# Process in batches for efficiency
|
147 |
-
if len(results_batch) >= batch_size:
|
148 |
-
progress(min(1.0, processed / (processed + 100)), desc=f"Retrieved {processed} Ollama instances")
|
149 |
-
|
150 |
-
# Extract instance data from batch
|
151 |
-
for result in results_batch:
|
152 |
-
instances.append({
|
153 |
-
'ip': result.get('ip_str'),
|
154 |
-
'port': result.get('port', 11434),
|
155 |
-
'country': result.get('location', {}).get('country_name'),
|
156 |
-
'region': result.get('location', {}).get('region_name'),
|
157 |
-
'org': result.get('org'),
|
158 |
-
'models': []
|
159 |
-
})
|
160 |
-
results_batch = []
|
161 |
-
|
162 |
-
# Process any remaining results
|
163 |
-
if results_batch:
|
164 |
-
for result in results_batch:
|
165 |
-
instances.append({
|
166 |
-
'ip': result.get('ip_str'),
|
167 |
-
'port': result.get('port', 11434),
|
168 |
-
'country': result.get('location', {}).get('country_name'),
|
169 |
-
'region': result.get('location', {}).get('region_name'),
|
170 |
-
'org': result.get('org'),
|
171 |
-
'models': []
|
172 |
-
})
|
173 |
-
|
174 |
-
logger.info(f"Completed Shodan scan, retrieved {len(instances)} Ollama instances")
|
175 |
-
return instances
|
176 |
-
|
177 |
-
except shodan.APIError as e:
|
178 |
-
error_msg = str(e)
|
179 |
-
if "Invalid API key" in error_msg:
|
180 |
-
logger.error(f"Shodan authentication failed: Invalid API key")
|
181 |
-
raise ValueError("Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable.")
|
182 |
-
elif "Request rate limit reached" in error_msg:
|
183 |
-
logger.error(f"Shodan rate limit exceeded: {e}")
|
184 |
-
raise ValueError("Shodan API rate limit exceeded. Please wait before trying again.")
|
185 |
-
else:
|
186 |
-
logger.error(f"Shodan API error: {e}")
|
187 |
-
raise
|
188 |
-
except Exception as e:
|
189 |
-
logger.error(f"Unhandled exception during Shodan scan: {e}")
|
190 |
-
raise
|
191 |
-
|
192 |
-
async def check_single_endpoint(session, instance):
|
193 |
-
"""Check a single Ollama endpoint for available models."""
|
194 |
-
ip = instance['ip']
|
195 |
-
port = instance['port']
|
196 |
url = f"http://{ip}:{port}/api/tags"
|
197 |
|
198 |
try:
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
except asyncio.TimeoutError:
|
213 |
-
logger.warning(f"
|
214 |
-
return instance
|
215 |
except Exception as e:
|
216 |
-
logger.
|
217 |
-
|
|
|
218 |
|
219 |
-
|
|
|
220 |
"""
|
221 |
-
|
|
|
222 |
|
223 |
Args:
|
224 |
-
|
225 |
-
progress: Gradio progress bar
|
226 |
|
227 |
Returns:
|
228 |
-
|
229 |
"""
|
230 |
-
if
|
231 |
-
|
232 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
-
|
235 |
-
|
236 |
-
progress(0, desc=f"Preparing to check {total_instances} Ollama endpoints")
|
237 |
|
238 |
-
|
239 |
-
conn = aiohttp.TCPConnector(limit=50, ttl_dns_cache=300)
|
240 |
-
timeout = aiohttp.ClientTimeout(total=30, connect=5, sock_connect=5, sock_read=20)
|
241 |
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
completed = 0
|
249 |
-
|
250 |
-
for future in asyncio.as_completed(tasks):
|
251 |
-
try:
|
252 |
-
# Process completed task
|
253 |
-
instance = await future
|
254 |
-
updated_instances.append(instance)
|
255 |
-
|
256 |
-
# Update progress with meaningful metrics
|
257 |
-
completed += 1
|
258 |
-
progress_pct = completed / total_instances
|
259 |
-
progress(progress_pct, desc=f"Checked {completed}/{total_instances} endpoints ({progress_pct:.1%})")
|
260 |
-
|
261 |
-
# Log models found
|
262 |
-
if instance.get('models'):
|
263 |
-
logger.info(f"Found {len(instance['models'])} models at {instance['ip']}:{instance['port']}")
|
264 |
-
|
265 |
-
except Exception as task_error:
|
266 |
-
# Handle per-task errors without stopping the process
|
267 |
-
logger.warning(f"Endpoint check failed: {task_error}")
|
268 |
-
# Continue processing remaining endpoints
|
269 |
-
|
270 |
-
valid_instances = [i for i in updated_instances if i.get('models')]
|
271 |
-
logger.info(f"Endpoint validation complete: {len(valid_instances)}/{total_instances} accessible")
|
272 |
-
return updated_instances
|
273 |
|
274 |
-
|
|
|
275 |
"""
|
276 |
-
|
277 |
-
|
278 |
-
Implements single-pass dataset updates with:
|
279 |
-
1. Optimized in-memory index of existing entries
|
280 |
-
2. Differential detection of new vs. modified instances
|
281 |
-
3. Single hub push with consolidated changes
|
282 |
|
283 |
Args:
|
284 |
-
|
285 |
-
instances: List of Ollama instances with model information
|
286 |
|
287 |
Returns:
|
288 |
-
|
289 |
"""
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
start_time = time.time()
|
295 |
-
|
296 |
-
# Optimization: Create indexed lookup of existing instances for O(1) access
|
297 |
-
dataset_dict = {}
|
298 |
-
for idx, item in enumerate(dataset):
|
299 |
-
key = f"{item['ip']}:{item['port']}"
|
300 |
-
dataset_dict[key] = {
|
301 |
-
'idx': idx,
|
302 |
-
'data': item
|
303 |
-
}
|
304 |
-
|
305 |
-
# Track modification metrics
|
306 |
-
stats = {
|
307 |
-
'new': 0,
|
308 |
-
'updated': 0,
|
309 |
-
'unchanged': 0,
|
310 |
-
'models_added': 0
|
311 |
-
}
|
312 |
-
|
313 |
-
# Process differentials
|
314 |
-
update_candidates = []
|
315 |
-
new_instances = []
|
316 |
-
|
317 |
-
for instance in instances:
|
318 |
-
# Skip instances without valid IP
|
319 |
-
if not instance.get('ip'):
|
320 |
-
continue
|
321 |
-
|
322 |
-
instance_key = f"{instance['ip']}:{instance['port']}"
|
323 |
|
324 |
-
|
325 |
-
|
326 |
-
existing = dataset_dict[instance_key]['data']
|
327 |
-
needs_update = False
|
328 |
-
|
329 |
-
# Check metadata changes
|
330 |
-
for field in ['country', 'region', 'org']:
|
331 |
-
if instance.get(field) and instance.get(field) != existing.get(field):
|
332 |
-
needs_update = True
|
333 |
-
|
334 |
-
# Check model changes - only update if models were found
|
335 |
-
if instance.get('models'):
|
336 |
-
# Compare model signatures to detect changes
|
337 |
-
existing_models = {model.get('name', ''): model for model in existing.get('models', [])}
|
338 |
-
new_models = {model.get('name', ''): model for model in instance.get('models', [])}
|
339 |
-
|
340 |
-
if set(new_models.keys()) != set(existing_models.keys()):
|
341 |
-
needs_update = True
|
342 |
-
stats['models_added'] += len(set(new_models.keys()) - set(existing_models.keys()))
|
343 |
-
|
344 |
-
if needs_update:
|
345 |
-
# Create updated instance
|
346 |
-
updated = dict(existing)
|
347 |
-
updated.update({
|
348 |
-
'country': instance.get('country', existing.get('country')),
|
349 |
-
'region': instance.get('region', existing.get('region')),
|
350 |
-
'org': instance.get('org', existing.get('org')),
|
351 |
-
})
|
352 |
-
|
353 |
-
# Only update models if they were found
|
354 |
-
if instance.get('models'):
|
355 |
-
updated['models'] = instance['models']
|
356 |
-
|
357 |
-
update_candidates.append(updated)
|
358 |
-
stats['updated'] += 1
|
359 |
-
else:
|
360 |
-
stats['unchanged'] += 1
|
361 |
-
else:
|
362 |
-
# New instance
|
363 |
-
new_instances.append(instance)
|
364 |
-
stats['new'] += 1
|
365 |
-
|
366 |
-
# Efficiently construct updated dataset
|
367 |
-
if new_instances or update_candidates:
|
368 |
-
# Start with current dataset
|
369 |
-
current_data = dataset.to_list()
|
370 |
|
371 |
-
#
|
372 |
-
|
373 |
-
|
374 |
-
idx = dataset_dict[instance_key]['idx']
|
375 |
-
current_data[idx] = updated
|
376 |
|
377 |
-
|
378 |
-
|
379 |
|
380 |
-
#
|
381 |
-
|
|
|
382 |
|
383 |
-
|
384 |
-
|
385 |
-
updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
|
386 |
|
387 |
-
|
388 |
-
logger.info(f"Dataset synchronization complete in {execution_time:.2f}s: {stats['new']} new, {stats['updated']} updated, {stats['unchanged']} unchanged, {stats['models_added']} new models")
|
389 |
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
394 |
|
395 |
-
def get_unique_values(dataset):
|
396 |
"""
|
397 |
-
Get unique values for
|
398 |
|
399 |
Args:
|
400 |
-
dataset:
|
|
|
401 |
|
402 |
Returns:
|
403 |
-
|
404 |
"""
|
405 |
-
|
406 |
-
families = set()
|
407 |
-
parameter_sizes = set()
|
408 |
-
|
409 |
-
# Extract unique values from models
|
410 |
-
for instance in dataset:
|
411 |
-
for model in instance.get('models', []):
|
412 |
-
details = model.get('details', {})
|
413 |
-
|
414 |
-
# Handle both direct details in the model and nested details
|
415 |
-
if isinstance(details, dict):
|
416 |
-
family = details.get('family')
|
417 |
-
parameter_size = details.get('parameter_size')
|
418 |
-
else:
|
419 |
-
family = model.get('family')
|
420 |
-
parameter_size = model.get('parameter_size')
|
421 |
-
|
422 |
-
if family:
|
423 |
-
families.add(family)
|
424 |
-
|
425 |
-
if parameter_size:
|
426 |
-
parameter_sizes.add(parameter_size)
|
427 |
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
-
def search_models(dataset, family=
|
434 |
"""
|
435 |
Search for models in the dataset based on filters.
|
|
|
436 |
|
437 |
Args:
|
438 |
-
dataset:
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
is_admin: Whether to include IP and port information
|
443 |
|
444 |
Returns:
|
445 |
-
List
|
446 |
"""
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
model_size_bytes = model.get('size', 0)
|
472 |
-
model_size_gb = model_size_bytes / (1024 * 1024 * 1024) if model_size_bytes else 0
|
473 |
|
474 |
# Apply filters
|
475 |
-
if
|
476 |
continue
|
477 |
-
|
478 |
-
if parameter_size and model_param_size != parameter_size:
|
479 |
continue
|
480 |
-
|
481 |
-
if name_search and name_search.lower() not in model_name.lower():
|
482 |
continue
|
483 |
|
484 |
-
#
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
'country': country,
|
492 |
-
'region': region,
|
493 |
-
'org': org,
|
494 |
}
|
495 |
|
496 |
-
#
|
497 |
-
|
|
|
|
|
|
|
|
|
498 |
|
499 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
if is_admin:
|
501 |
-
|
502 |
-
|
503 |
|
504 |
-
|
505 |
|
506 |
-
return
|
507 |
|
508 |
-
def
|
509 |
"""
|
510 |
-
Create
|
511 |
|
512 |
Returns:
|
513 |
-
gr.Blocks:
|
514 |
"""
|
515 |
-
#
|
516 |
-
def validate_admin():
|
517 |
-
"""Check if current user has admin privileges based on API key"""
|
518 |
-
# For production systems, this would use proper authentication
|
519 |
-
# Currently using API key presence as simple auth mechanism
|
520 |
-
admin_key = os.getenv("ADMIN_KEY", "")
|
521 |
-
shodan_key = os.getenv("SHODAN_API_KEY", "")
|
522 |
-
return bool(admin_key and shodan_key)
|
523 |
-
|
524 |
try:
|
525 |
-
# Initialize critical data structures once at startup
|
526 |
-
logger.info("Initializing application data layer")
|
527 |
dataset = load_or_create_dataset()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
is_admin = validate_admin()
|
539 |
-
admin_status = "enabled" if is_admin else "disabled"
|
540 |
-
logger.info(f"Administrative access: {admin_status}")
|
541 |
-
|
542 |
-
# Create interface with optimized structure
|
543 |
-
with gr.Blocks(
|
544 |
-
title="Ollama Instance Scanner",
|
545 |
-
theme=gr.themes.Soft(),
|
546 |
-
css=".footer {text-align: center; margin-top: 20px; color: #666;}"
|
547 |
-
) as interface:
|
548 |
-
# Header section
|
549 |
-
with gr.Row():
|
550 |
-
with gr.Column():
|
551 |
-
gr.Markdown("# Ollama Instance Scanner")
|
552 |
-
gr.Markdown("Browse publicly accessible Ollama models and their capabilities")
|
553 |
-
|
554 |
-
# Tab container
|
555 |
-
with gr.Tabs() as tabs:
|
556 |
-
# Tab 1: Model Browser (Public)
|
557 |
-
with gr.TabItem("Browse Models"):
|
558 |
-
with gr.Row():
|
559 |
-
# Filter controls
|
560 |
-
with gr.Column(scale=1):
|
561 |
-
with gr.Box():
|
562 |
-
gr.Markdown("### Search Filters")
|
563 |
-
family_dropdown = gr.Dropdown(
|
564 |
-
choices=["All"] + unique_values['families'],
|
565 |
-
value="All",
|
566 |
-
label="Model Family",
|
567 |
-
interactive=True
|
568 |
-
)
|
569 |
-
parameter_size_dropdown = gr.Dropdown(
|
570 |
-
choices=["All"] + unique_values['parameter_sizes'],
|
571 |
-
value="All",
|
572 |
-
label="Parameter Size",
|
573 |
-
interactive=True
|
574 |
-
)
|
575 |
-
name_search = gr.Textbox(
|
576 |
-
label="Model Name",
|
577 |
-
placeholder="Enter model name...",
|
578 |
-
interactive=True
|
579 |
-
)
|
580 |
-
search_button = gr.Button("Search Models", variant="primary")
|
581 |
-
|
582 |
-
# Results section
|
583 |
-
with gr.Row():
|
584 |
-
# Model results table
|
585 |
-
results_table = gr.DataFrame(
|
586 |
-
value=initial_results,
|
587 |
-
headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
|
588 |
-
label="Available Models",
|
589 |
-
interactive=False,
|
590 |
-
wrap=True
|
591 |
)
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
model_details = gr.JSON(
|
597 |
-
label="Model Specifications",
|
598 |
-
visible=True
|
599 |
)
|
|
|
600 |
|
601 |
-
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
# Scanner controls
|
608 |
-
with gr.Row():
|
609 |
-
shodan_scan_button = gr.Button(
|
610 |
-
"Start Shodan Scan",
|
611 |
-
variant="primary",
|
612 |
-
interactive=is_admin
|
613 |
-
)
|
614 |
-
|
615 |
-
# Status display
|
616 |
-
with gr.Row():
|
617 |
-
scan_status = gr.Textbox(
|
618 |
-
label="Scan Status",
|
619 |
-
value="Ready to scan" if is_admin else "Admin access required",
|
620 |
-
interactive=False
|
621 |
-
)
|
622 |
-
|
623 |
-
# Footer
|
624 |
-
gr.Markdown(
|
625 |
-
"### Ollama Instance Scanner | Powered by Shodan & Hugging Face",
|
626 |
-
elem_classes=["footer"]
|
627 |
-
)
|
628 |
-
|
629 |
-
# Define optimized event handlers
|
630 |
-
def on_search_click(family, parameter_size, name_search):
|
631 |
-
"""Process model search with optimized filtering"""
|
632 |
-
try:
|
633 |
-
# Apply filters
|
634 |
-
family_filter = None if family == "All" else family
|
635 |
-
param_size_filter = None if parameter_size == "All" else parameter_size
|
636 |
-
name_filter = None if not name_search else name_search.strip()
|
637 |
-
|
638 |
-
# Execute search with admin privileges if available
|
639 |
-
results = search_models(
|
640 |
-
dataset,
|
641 |
-
family_filter,
|
642 |
-
param_size_filter,
|
643 |
-
name_filter,
|
644 |
-
is_admin
|
645 |
)
|
646 |
-
|
647 |
-
logger.info(f"Search completed: {len(results)} models found matching criteria")
|
648 |
-
return results
|
649 |
-
except Exception as search_error:
|
650 |
-
logger.error(f"Search failed: {search_error}")
|
651 |
-
# Return empty results on error
|
652 |
-
return []
|
653 |
-
|
654 |
-
def on_table_select(evt: gr.SelectData, results):
|
655 |
-
"""Handle table row selection with error protection"""
|
656 |
-
try:
|
657 |
-
if evt and evt.index and len(results) > evt.index[0]:
|
658 |
-
selected_row = results[evt.index[0]]
|
659 |
-
# Extract and return model details
|
660 |
-
return selected_row.get('full_model_info', "{}")
|
661 |
-
return "{}"
|
662 |
-
except Exception as selection_error:
|
663 |
-
logger.error(f"Selection error: {selection_error}")
|
664 |
-
return "{}"
|
665 |
-
|
666 |
-
async def run_shodan_scan():
|
667 |
-
"""Execute Shodan scan workflow with comprehensive monitoring"""
|
668 |
-
if not is_admin:
|
669 |
-
return "Error: Administrative access required"
|
670 |
|
671 |
-
|
672 |
-
|
673 |
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
-
|
679 |
-
|
680 |
-
|
681 |
-
|
682 |
-
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
692 |
-
|
693 |
-
|
694 |
-
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
699 |
|
700 |
-
|
701 |
-
|
702 |
-
|
703 |
-
f"• {instance_count} total instances discovered\n"
|
704 |
-
f"• {accessible_count} instances with accessible models\n"
|
705 |
-
f"• {len(unique_values['families'])} unique model families\n"
|
706 |
-
f"• {len(unique_values['parameter_sizes'])} parameter size variants"
|
707 |
)
|
708 |
-
|
709 |
-
logger.info(f"Scan {scan_id} completed successfully")
|
710 |
-
return report
|
711 |
-
|
712 |
-
except Exception as scan_error:
|
713 |
-
logger.error(f"Scan {scan_id} failed: {scan_error}")
|
714 |
-
|
715 |
-
# Generate actionable error message
|
716 |
-
if isinstance(scan_error, ValueError) and "API key" in str(scan_error):
|
717 |
-
return "Error: Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable."
|
718 |
-
elif isinstance(scan_error, ConnectionError):
|
719 |
-
return "Error: Network connectivity issue. Please check your internet connection."
|
720 |
-
else:
|
721 |
-
return f"Error: Scan operation failed - {str(scan_error)}"
|
722 |
-
|
723 |
-
# Connect event handlers to UI components
|
724 |
-
search_button.click(
|
725 |
-
fn=on_search_click,
|
726 |
-
inputs=[family_dropdown, parameter_size_dropdown, name_search],
|
727 |
-
outputs=[results_table]
|
728 |
-
)
|
729 |
|
730 |
-
|
731 |
-
|
732 |
-
|
733 |
-
|
734 |
-
|
|
|
|
|
|
|
735 |
|
736 |
-
|
737 |
-
|
738 |
-
inputs=[],
|
739 |
-
outputs=[scan_status]
|
740 |
-
)
|
741 |
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
logger.critical(f"Interface initialization failed: {interface_error}")
|
747 |
-
raise ValueError(f"Failed to create application interface: {interface_error}") from interface_error
|
748 |
-
|
749 |
-
def validate_env_variables():
|
750 |
-
"""
|
751 |
-
Centralized validation of critical environment variables with precise error messaging.
|
752 |
-
|
753 |
-
Raises:
|
754 |
-
ValueError: When any required environment variable is missing
|
755 |
-
"""
|
756 |
-
required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
|
757 |
-
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
758 |
-
|
759 |
-
if missing_vars:
|
760 |
-
error_msg = f"Missing critical environment variables: {', '.join(missing_vars)}"
|
761 |
-
logger.critical(error_msg)
|
762 |
-
raise ValueError(error_msg)
|
763 |
-
|
764 |
-
# Validate token quality
|
765 |
-
hf_token = os.getenv("HF_TOKEN")
|
766 |
-
if len(hf_token) < 8: # Minimum length for plausible token
|
767 |
-
logger.warning("HF_TOKEN appears malformed (insufficient length)")
|
768 |
|
769 |
-
|
770 |
-
|
771 |
-
def main():
|
772 |
-
"""
|
773 |
-
Application entry point with centralized error handling and environment validation.
|
774 |
-
"""
|
775 |
-
try:
|
776 |
-
# Validate environment once at startup
|
777 |
-
validate_env_variables()
|
778 |
-
|
779 |
-
# Initialize and launch interface
|
780 |
-
logger.info("Initializing Gradio interface")
|
781 |
-
interface = create_interface()
|
782 |
-
|
783 |
-
if interface:
|
784 |
-
logger.info("Starting Gradio server")
|
785 |
-
interface.launch()
|
786 |
-
else:
|
787 |
-
logger.critical("Interface initialization failed")
|
788 |
-
sys.exit(1)
|
789 |
-
|
790 |
-
except ValueError as config_error:
|
791 |
-
# Handle configuration errors
|
792 |
-
logger.critical(f"Configuration error: {config_error}")
|
793 |
-
sys.exit(1)
|
794 |
-
|
795 |
-
except Exception as fatal_error:
|
796 |
-
# Handle unexpected errors
|
797 |
-
logger.critical(f"Fatal application error: {fatal_error}")
|
798 |
-
sys.exit(1)
|
799 |
|
|
|
800 |
if __name__ == "__main__":
|
801 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Ollama Instance & Model Scanner for Hugging Face Space
|
3 |
+
|
4 |
+
This application scans for publicly accessible Ollama instances, retrieves model information,
|
5 |
+
and provides a secure interface for browsing discovered models.
|
6 |
+
|
7 |
+
Security Architecture:
|
8 |
+
- Server-side authorization based on environment variables
|
9 |
+
- Strict input sanitization
|
10 |
+
- Comprehensive error handling
|
11 |
+
- Asynchronous endpoint checking
|
12 |
+
- Efficient dataset management
|
13 |
+
"""
|
14 |
+
|
15 |
import os
|
16 |
+
import re
|
17 |
+
import json
|
18 |
+
import asyncio
|
19 |
import logging
|
20 |
+
import gradio as gr
|
21 |
import shodan
|
|
|
22 |
import aiohttp
|
23 |
+
from datasets import load_dataset, Dataset
|
24 |
+
from typing import Dict, List, Optional, Any, Tuple, Union
|
25 |
+
from datetime import datetime
|
26 |
+
from functools import wraps
|
27 |
|
28 |
+
# Configure logging
|
29 |
logging.basicConfig(
|
30 |
level=logging.INFO,
|
31 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
32 |
+
handlers=[logging.StreamHandler()]
|
|
|
|
|
|
|
33 |
)
|
34 |
logger = logging.getLogger(__name__)
|
35 |
|
36 |
+
# Security layer - Authorization functions
|
37 |
+
|
38 |
+
def authorization_required(func):
|
39 |
"""
|
40 |
+
Decorator that enforces server-side authorization for protected functions.
|
41 |
+
Authorization is determined by environment variables, not client parameters.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
func: The function to protect with authorization
|
45 |
|
46 |
Returns:
|
47 |
+
A wrapped function that performs authorization check
|
48 |
+
"""
|
49 |
+
@wraps(func)
|
50 |
+
def wrapper(*args, **kwargs):
|
51 |
+
if not verify_admin_authorization():
|
52 |
+
logger.warning(f"Unauthorized access attempt to {func.__name__}")
|
53 |
+
return {"error": "Unauthorized access"} if kwargs.get("return_error", False) else None
|
54 |
+
return func(*args, **kwargs)
|
55 |
+
return wrapper
|
56 |
+
|
57 |
+
def verify_admin_authorization() -> bool:
|
58 |
+
"""
|
59 |
+
Perform server-side verification of admin authorization.
|
60 |
+
Authorization is based on environment variables, not client data.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
bool: True if valid admin credentials exist
|
64 |
+
"""
|
65 |
+
try:
|
66 |
+
# Check for the existence of the Shodan API key
|
67 |
+
api_key = os.getenv("SHODAN_API_KEY")
|
68 |
+
hf_token = os.getenv("HF_TOKEN")
|
69 |
+
|
70 |
+
return (api_key is not None and
|
71 |
+
len(api_key.strip()) > 10 and
|
72 |
+
hf_token is not None and
|
73 |
+
len(hf_token.strip()) > 10)
|
74 |
+
except Exception as e:
|
75 |
+
logger.error(f"Error verifying admin authorization: {str(e)}")
|
76 |
+
return False
|
77 |
+
|
78 |
+
# Security layer - Input validation
|
79 |
+
|
80 |
+
def sanitize_input(input_string: str) -> str:
|
81 |
+
"""
|
82 |
+
Sanitize user input to prevent injection attacks.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
input_string: User input string to sanitize
|
86 |
+
|
87 |
+
Returns:
|
88 |
+
str: Sanitized string
|
89 |
+
"""
|
90 |
+
if not isinstance(input_string, str):
|
91 |
+
return ""
|
92 |
+
|
93 |
+
# Remove potentially harmful characters
|
94 |
+
sanitized = re.sub(r'[^\w\s\-\.]', '', input_string)
|
95 |
+
# Limit length to prevent DoS
|
96 |
+
return sanitized[:100]
|
97 |
+
|
98 |
+
def get_env_variables() -> Dict[str, str]:
|
99 |
+
"""
|
100 |
+
Get all required environment variables.
|
101 |
+
|
102 |
+
Returns:
|
103 |
+
Dict[str, str]: Dictionary containing environment variables
|
104 |
|
105 |
Raises:
|
106 |
+
ValueError: If any required environment variable is missing
|
|
|
107 |
"""
|
108 |
+
env_vars = {
|
109 |
+
"SHODAN_API_KEY": os.getenv("SHODAN_API_KEY"),
|
110 |
+
"SHODAN_QUERY": os.getenv("SHODAN_QUERY", "product:Ollama port:11434"),
|
111 |
+
"HF_TOKEN": os.getenv("HF_TOKEN")
|
112 |
+
}
|
113 |
+
|
114 |
+
missing_vars = [name for name, value in env_vars.items() if not value]
|
115 |
+
if missing_vars:
|
116 |
+
error_msg = f"Missing required environment variables: {', '.join(missing_vars)}"
|
117 |
+
logger.error(error_msg)
|
118 |
+
raise ValueError(error_msg)
|
119 |
|
120 |
+
return env_vars
|
121 |
+
|
122 |
+
# Data access layer
|
123 |
+
|
124 |
+
def load_or_create_dataset() -> Dataset:
|
125 |
+
"""
|
126 |
+
Load the dataset from Hugging Face Hub or create it if it doesn't exist.
|
127 |
|
128 |
+
Returns:
|
129 |
+
Dataset: Loaded or created dataset
|
130 |
+
|
131 |
+
Raises:
|
132 |
+
Exception: If dataset loading or creation fails
|
133 |
+
"""
|
134 |
try:
|
135 |
+
# Attempt to get environment variables - this will raise ValueError if missing
|
136 |
+
env_vars = get_env_variables()
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
+
logger.info("Attempting to load dataset from Hugging Face Hub")
|
139 |
+
dataset = load_dataset("latterworks/llama_checker_results", use_auth_token=env_vars["HF_TOKEN"])
|
140 |
+
dataset = dataset['train']
|
141 |
+
logger.info(f"Successfully loaded dataset with {len(dataset)} entries")
|
142 |
+
return dataset
|
143 |
+
except ValueError as e:
|
144 |
+
# Re-raise environment variable errors
|
145 |
+
raise
|
|
|
|
|
|
|
|
|
146 |
except FileNotFoundError:
|
147 |
+
# Only create dataset if admin authorization is verified
|
148 |
+
if not verify_admin_authorization():
|
149 |
+
logger.error("Unauthorized attempt to create dataset")
|
150 |
+
raise ValueError("Unauthorized: Only admins can create the dataset")
|
151 |
+
|
152 |
+
logger.info("Dataset not found, creating a new one")
|
153 |
+
env_vars = get_env_variables()
|
154 |
+
dataset = Dataset.from_dict({
|
155 |
"ip": [],
|
156 |
"port": [],
|
157 |
"country": [],
|
|
|
159 |
"org": [],
|
160 |
"models": []
|
161 |
})
|
162 |
+
dataset.push_to_hub("latterworks/llama_checker_results", token=env_vars["HF_TOKEN"])
|
163 |
+
logger.info("Created and pushed empty dataset to Hugging Face Hub")
|
164 |
|
165 |
+
# Reload the dataset to ensure consistency
|
166 |
+
dataset = load_dataset("latterworks/llama_checker_results", use_auth_token=env_vars["HF_TOKEN"])['train']
|
167 |
+
return dataset
|
168 |
+
except Exception as e:
|
169 |
+
error_msg = f"Failed to load or create dataset: {str(e)}"
|
170 |
+
logger.error(error_msg)
|
171 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
+
async def check_single_endpoint(ip: str, port: int, timeout: int = 5) -> Optional[List[Dict[str, Any]]]:
|
174 |
"""
|
175 |
+
Check a single Ollama endpoint for available models.
|
176 |
|
177 |
Args:
|
178 |
+
ip: IP address of the Ollama instance
|
179 |
+
port: Port number of the Ollama instance
|
180 |
+
timeout: Timeout in seconds for the HTTP request
|
181 |
|
182 |
Returns:
|
183 |
+
Optional[List[Dict[str, Any]]]: List of model information dictionaries, or None if endpoint check fails
|
184 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
url = f"http://{ip}:{port}/api/tags"
|
186 |
|
187 |
try:
|
188 |
+
async with aiohttp.ClientSession() as session:
|
189 |
+
async with session.get(url, timeout=timeout) as response:
|
190 |
+
if response.status == 200:
|
191 |
+
data = await response.json()
|
192 |
+
if "models" in data and isinstance(data["models"], list):
|
193 |
+
logger.info(f"Successfully retrieved {len(data['models'])} models from {ip}:{port}")
|
194 |
+
return data["models"]
|
195 |
+
else:
|
196 |
+
logger.warning(f"Unexpected response format from {ip}:{port}")
|
197 |
+
else:
|
198 |
+
logger.warning(f"Received status code {response.status} from {ip}:{port}")
|
199 |
+
except aiohttp.ClientError as e:
|
200 |
+
logger.warning(f"Connection error for {ip}:{port}: {str(e)}")
|
201 |
except asyncio.TimeoutError:
|
202 |
+
logger.warning(f"Connection timeout for {ip}:{port}")
|
|
|
203 |
except Exception as e:
|
204 |
+
logger.warning(f"Unexpected error checking {ip}:{port}: {str(e)}")
|
205 |
+
|
206 |
+
return None
|
207 |
|
208 |
+
@authorization_required
|
209 |
+
async def check_ollama_endpoints(dataset: Dataset, progress: Optional[gr.Progress] = None) -> Dataset:
|
210 |
"""
|
211 |
+
Check all Ollama endpoints in the dataset for available models.
|
212 |
+
Requires admin authorization.
|
213 |
|
214 |
Args:
|
215 |
+
dataset: Dataset containing Ollama endpoints
|
216 |
+
progress: Optional Gradio progress bar
|
217 |
|
218 |
Returns:
|
219 |
+
Dataset: Updated dataset with model information
|
220 |
"""
|
221 |
+
if progress:
|
222 |
+
progress(0, desc="Preparing to check endpoints...")
|
223 |
+
|
224 |
+
# Build a list of tasks to execute
|
225 |
+
total_endpoints = len(dataset)
|
226 |
+
tasks = []
|
227 |
+
|
228 |
+
for i, item in enumerate(dataset):
|
229 |
+
ip = item["ip"]
|
230 |
+
port = item["port"]
|
231 |
+
tasks.append(check_single_endpoint(ip, port))
|
232 |
+
|
233 |
+
# Execute tasks in batches to avoid overwhelming resources
|
234 |
+
batch_size = 10
|
235 |
+
updated_dataset = dataset.copy()
|
236 |
+
|
237 |
+
for i in range(0, len(tasks), batch_size):
|
238 |
+
if progress:
|
239 |
+
progress(i / len(tasks), desc=f"Checking endpoints {i+1}-{min(i+batch_size, len(tasks))} of {len(tasks)}...")
|
240 |
+
|
241 |
+
batch_tasks = tasks[i:i+batch_size]
|
242 |
+
batch_results = await asyncio.gather(*batch_tasks)
|
243 |
+
|
244 |
+
for j, result in enumerate(batch_results):
|
245 |
+
idx = i + j
|
246 |
+
if idx < len(dataset):
|
247 |
+
if result:
|
248 |
+
updated_dataset = updated_dataset.add_item({
|
249 |
+
"ip": dataset[idx]["ip"],
|
250 |
+
"port": dataset[idx]["port"],
|
251 |
+
"country": dataset[idx]["country"],
|
252 |
+
"region": dataset[idx]["region"],
|
253 |
+
"org": dataset[idx]["org"],
|
254 |
+
"models": result
|
255 |
+
})
|
256 |
|
257 |
+
if progress:
|
258 |
+
progress(1.0, desc="Endpoint checking complete!")
|
|
|
259 |
|
260 |
+
logger.info(f"Checked {total_endpoints} endpoints, found models on {sum(1 for item in updated_dataset if item['models'])} endpoints")
|
|
|
|
|
261 |
|
262 |
+
# Push updated dataset to Hugging Face Hub
|
263 |
+
env_vars = get_env_variables()
|
264 |
+
updated_dataset.push_to_hub("latterworks/llama_checker_results", token=env_vars["HF_TOKEN"])
|
265 |
+
logger.info("Successfully pushed updated dataset to Hugging Face Hub")
|
266 |
+
|
267 |
+
return updated_dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
|
269 |
+
@authorization_required
|
270 |
+
def scan_shodan(progress: Optional[gr.Progress] = None) -> str:
|
271 |
"""
|
272 |
+
Scan Shodan for Ollama instances and update the dataset.
|
273 |
+
Requires admin authorization.
|
|
|
|
|
|
|
|
|
274 |
|
275 |
Args:
|
276 |
+
progress: Optional Gradio progress bar
|
|
|
277 |
|
278 |
Returns:
|
279 |
+
str: Status message
|
280 |
"""
|
281 |
+
try:
|
282 |
+
# Get environment variables
|
283 |
+
env_vars = get_env_variables()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
+
# Load dataset
|
286 |
+
dataset = load_or_create_dataset()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
+
# Initialize Shodan API client
|
289 |
+
api = shodan.Shodan(env_vars["SHODAN_API_KEY"])
|
290 |
+
query = env_vars["SHODAN_QUERY"]
|
|
|
|
|
291 |
|
292 |
+
if progress:
|
293 |
+
progress(0, desc="Starting Shodan search...")
|
294 |
|
295 |
+
# Get total results count
|
296 |
+
count_result = api.count(query)
|
297 |
+
total_results = count_result.get('total', 0)
|
298 |
|
299 |
+
if total_results == 0:
|
300 |
+
return "No Ollama instances found on Shodan."
|
|
|
301 |
|
302 |
+
logger.info(f"Found {total_results} potential Ollama instances on Shodan")
|
|
|
303 |
|
304 |
+
# Search Shodan
|
305 |
+
new_instances = []
|
306 |
+
results_processed = 0
|
307 |
+
|
308 |
+
for result in api.search_cursor(query):
|
309 |
+
results_processed += 1
|
310 |
+
|
311 |
+
if progress:
|
312 |
+
progress(results_processed / total_results,
|
313 |
+
desc=f"Processing Shodan result {results_processed}/{total_results}")
|
314 |
+
|
315 |
+
ip = result.get('ip_str')
|
316 |
+
port = result.get('port', 11434)
|
317 |
+
|
318 |
+
# Skip if instance already exists in dataset
|
319 |
+
if any(item["ip"] == ip and item["port"] == port for item in dataset):
|
320 |
+
continue
|
321 |
+
|
322 |
+
# Extract location information
|
323 |
+
country = result.get('location', {}).get('country_name', '')
|
324 |
+
region = result.get('location', {}).get('region_name', '')
|
325 |
+
org = result.get('org', '')
|
326 |
+
|
327 |
+
new_instances.append({
|
328 |
+
"ip": ip,
|
329 |
+
"port": port,
|
330 |
+
"country": country,
|
331 |
+
"region": region,
|
332 |
+
"org": org,
|
333 |
+
"models": []
|
334 |
+
})
|
335 |
+
|
336 |
+
if progress:
|
337 |
+
progress(1.0, desc="Shodan search complete!")
|
338 |
+
|
339 |
+
# Add new instances to dataset
|
340 |
+
updated_dataset = dataset.copy()
|
341 |
+
for instance in new_instances:
|
342 |
+
updated_dataset = updated_dataset.add_item(instance)
|
343 |
+
|
344 |
+
logger.info(f"Added {len(new_instances)} new instances to dataset")
|
345 |
+
|
346 |
+
# Check Ollama endpoints asynchronously
|
347 |
+
if new_instances:
|
348 |
+
loop = asyncio.new_event_loop()
|
349 |
+
asyncio.set_event_loop(loop)
|
350 |
+
updated_dataset = loop.run_until_complete(check_ollama_endpoints(updated_dataset, progress))
|
351 |
+
loop.close()
|
352 |
+
|
353 |
+
status_message = f"Scan complete! Found {len(new_instances)} new Ollama instances."
|
354 |
+
return status_message
|
355 |
+
|
356 |
+
except shodan.APIError as e:
|
357 |
+
error_msg = f"Shodan API error: {str(e)}"
|
358 |
+
logger.error(error_msg)
|
359 |
+
return error_msg
|
360 |
+
except Exception as e:
|
361 |
+
error_msg = f"Error during Shodan scan: {str(e)}"
|
362 |
+
logger.error(error_msg)
|
363 |
+
return error_msg
|
364 |
|
365 |
+
def get_unique_values(dataset: Dataset, field: str) -> List[str]:
|
366 |
"""
|
367 |
+
Get unique values for a specific field in the dataset.
|
368 |
|
369 |
Args:
|
370 |
+
dataset: Dataset to extract values from
|
371 |
+
field: Field name to extract values from
|
372 |
|
373 |
Returns:
|
374 |
+
List[str]: List of unique values
|
375 |
"""
|
376 |
+
unique_values = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
377 |
|
378 |
+
if field == "family" or field == "parameter_size" or field == "quantization_level":
|
379 |
+
for item in dataset:
|
380 |
+
models = item.get("models", [])
|
381 |
+
if not models:
|
382 |
+
continue
|
383 |
+
|
384 |
+
for model in models:
|
385 |
+
details = model.get("details", {})
|
386 |
+
if details and field in details:
|
387 |
+
value = details.get(field)
|
388 |
+
if value:
|
389 |
+
unique_values.add(value)
|
390 |
+
|
391 |
+
return sorted(list(unique_values))
|
392 |
|
393 |
+
def search_models(dataset: Dataset, name_search: str = "", family: str = "", parameter_size: str = "") -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
394 |
"""
|
395 |
Search for models in the dataset based on filters.
|
396 |
+
Authorization is determined server-side.
|
397 |
|
398 |
Args:
|
399 |
+
dataset: Dataset to search
|
400 |
+
name_search: Model name search string
|
401 |
+
family: Model family filter
|
402 |
+
parameter_size: Parameter size filter
|
|
|
403 |
|
404 |
Returns:
|
405 |
+
Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]: Filtered model list and detailed model list
|
406 |
"""
|
407 |
+
# Server-side authorization check
|
408 |
+
is_admin = verify_admin_authorization()
|
409 |
+
|
410 |
+
name_search = sanitize_input(name_search).lower()
|
411 |
+
family = sanitize_input(family)
|
412 |
+
parameter_size = sanitize_input(parameter_size)
|
413 |
+
|
414 |
+
filtered_models = []
|
415 |
+
detailed_models = []
|
416 |
+
|
417 |
+
for item in dataset:
|
418 |
+
models = item.get("models", [])
|
419 |
+
if not models:
|
420 |
+
continue
|
421 |
|
422 |
+
ip = item.get("ip", "")
|
423 |
+
port = item.get("port", 0)
|
424 |
+
country = item.get("country", "")
|
425 |
+
region = item.get("region", "")
|
426 |
+
org = item.get("org", "")
|
427 |
+
|
428 |
+
for model in models:
|
429 |
+
model_name = model.get("name", "").lower()
|
430 |
+
details = model.get("details", {})
|
431 |
+
model_family = details.get("family", "")
|
432 |
+
model_parameter_size = details.get("parameter_size", "")
|
433 |
+
model_quantization = details.get("quantization_level", "")
|
434 |
+
model_size = model.get("size", 0)
|
435 |
+
model_size_gb = round(model_size / (1024**3), 2) if model_size else 0
|
|
|
|
|
|
|
436 |
|
437 |
# Apply filters
|
438 |
+
if name_search and name_search not in model_name:
|
439 |
continue
|
440 |
+
if family and family != model_family:
|
|
|
441 |
continue
|
442 |
+
if parameter_size and parameter_size != model_parameter_size:
|
|
|
443 |
continue
|
444 |
|
445 |
+
# Prepare filtered model entry
|
446 |
+
filtered_model = {
|
447 |
+
"name": model.get("name", ""),
|
448 |
+
"family": model_family,
|
449 |
+
"parameter_size": model_parameter_size,
|
450 |
+
"quantization_level": model_quantization,
|
451 |
+
"size_gb": model_size_gb
|
|
|
|
|
|
|
452 |
}
|
453 |
|
454 |
+
# Add IP and port information only for admins - server-side check
|
455 |
+
if is_admin:
|
456 |
+
filtered_model["ip"] = ip
|
457 |
+
filtered_model["port"] = port
|
458 |
+
|
459 |
+
filtered_models.append(filtered_model)
|
460 |
|
461 |
+
# Prepare detailed model entry
|
462 |
+
detailed_model = {
|
463 |
+
"name": model.get("name", ""),
|
464 |
+
"family": model_family,
|
465 |
+
"parameter_size": model_parameter_size,
|
466 |
+
"quantization_level": model_quantization,
|
467 |
+
"size_gb": model_size_gb,
|
468 |
+
"digest": model.get("digest", ""),
|
469 |
+
"modified_at": model.get("modified_at", ""),
|
470 |
+
"country": country,
|
471 |
+
"region": region,
|
472 |
+
"org": org
|
473 |
+
}
|
474 |
+
|
475 |
+
# Add IP and port information only for admins - server-side check
|
476 |
if is_admin:
|
477 |
+
detailed_model["ip"] = ip
|
478 |
+
detailed_model["port"] = port
|
479 |
|
480 |
+
detailed_models.append(detailed_model)
|
481 |
|
482 |
+
return filtered_models, detailed_models
|
483 |
|
484 |
+
def create_ui() -> gr.Blocks:
|
485 |
"""
|
486 |
+
Create the Gradio user interface with server-side authorization.
|
487 |
|
488 |
Returns:
|
489 |
+
gr.Blocks: Gradio interface
|
490 |
"""
|
491 |
+
# Load dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
try:
|
|
|
|
|
493 |
dataset = load_or_create_dataset()
|
494 |
+
except Exception as e:
|
495 |
+
# Fallback to empty dataset if loading fails
|
496 |
+
logger.error(f"Failed to load dataset: {str(e)}")
|
497 |
+
dataset = Dataset.from_dict({
|
498 |
+
"ip": [],
|
499 |
+
"port": [],
|
500 |
+
"country": [],
|
501 |
+
"region": [],
|
502 |
+
"org": [],
|
503 |
+
"models": []
|
504 |
+
})
|
505 |
+
|
506 |
+
# Server-side authorization check
|
507 |
+
is_admin = verify_admin_authorization()
|
508 |
+
|
509 |
+
# Get unique values for dropdowns
|
510 |
+
families = [""] + get_unique_values(dataset, "family")
|
511 |
+
parameter_sizes = [""] + get_unique_values(dataset, "parameter_size")
|
512 |
+
|
513 |
+
# Initial search results
|
514 |
+
initial_results, initial_details = search_models(dataset)
|
515 |
+
|
516 |
+
with gr.Blocks(title="Ollama Instance & Model Browser") as app:
|
517 |
+
gr.Markdown("# Ollama Instance & Model Browser")
|
518 |
|
519 |
+
with gr.Tabs() as tabs:
|
520 |
+
with gr.Tab("Browse Models"):
|
521 |
+
with gr.Row():
|
522 |
+
with gr.Column(scale=1):
|
523 |
+
name_search = gr.Textbox(label="Model Name Search")
|
524 |
+
family_dropdown = gr.Dropdown(
|
525 |
+
choices=families,
|
526 |
+
label="Model Family",
|
527 |
+
value=""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
)
|
529 |
+
parameter_size_dropdown = gr.Dropdown(
|
530 |
+
choices=parameter_sizes,
|
531 |
+
label="Parameter Size",
|
532 |
+
value=""
|
|
|
|
|
|
|
533 |
)
|
534 |
+
search_button = gr.Button("Search Models")
|
535 |
|
536 |
+
with gr.Row():
|
537 |
+
model_results = gr.DataFrame(
|
538 |
+
value=initial_results,
|
539 |
+
label="Model Results",
|
540 |
+
interactive=False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
541 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
|
543 |
+
with gr.Row():
|
544 |
+
model_details = gr.JSON(label="Model Details")
|
545 |
|
546 |
+
def search_callback(name, family, parameter_size):
|
547 |
+
results, details = search_models(dataset, name, family, parameter_size)
|
548 |
+
return results, None
|
549 |
+
|
550 |
+
def select_model(evt: gr.SelectData):
|
551 |
+
results, details = search_models(dataset, name_search.value,
|
552 |
+
family_dropdown.value,
|
553 |
+
parameter_size_dropdown.value)
|
554 |
+
if evt.index[0] < len(details):
|
555 |
+
return details[evt.index[0]]
|
556 |
+
return None
|
557 |
+
|
558 |
+
search_button.click(
|
559 |
+
search_callback,
|
560 |
+
inputs=[name_search, family_dropdown, parameter_size_dropdown],
|
561 |
+
outputs=[model_results, model_details]
|
562 |
+
)
|
563 |
+
|
564 |
+
model_results.select(
|
565 |
+
select_model,
|
566 |
+
None,
|
567 |
+
model_details
|
568 |
+
)
|
569 |
+
|
570 |
+
# Only show Shodan Scan tab for admins - server-side check
|
571 |
+
if is_admin:
|
572 |
+
with gr.Tab("Shodan Scan"):
|
573 |
+
gr.Markdown("## Scan for Ollama Instances")
|
574 |
+
gr.Markdown("**Note:** This scan will update the dataset with new Ollama instances.")
|
575 |
+
scan_button = gr.Button("Start Scan")
|
576 |
+
scan_output = gr.Textbox(label="Scan Status")
|
577 |
|
578 |
+
scan_button.click(
|
579 |
+
lambda progress=gr.Progress(): scan_shodan(progress),
|
580 |
+
outputs=scan_output
|
|
|
|
|
|
|
|
|
581 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
|
583 |
+
# Refresh dataset when the app starts
|
584 |
+
def refresh_data():
|
585 |
+
nonlocal dataset
|
586 |
+
try:
|
587 |
+
dataset = load_or_create_dataset()
|
588 |
+
except Exception as e:
|
589 |
+
logger.error(f"Failed to refresh dataset: {str(e)}")
|
590 |
+
# Continue with existing dataset
|
591 |
|
592 |
+
results, details = search_models(dataset)
|
593 |
+
return results
|
|
|
|
|
|
|
594 |
|
595 |
+
app.load(
|
596 |
+
fn=refresh_data,
|
597 |
+
outputs=model_results
|
598 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
599 |
|
600 |
+
return app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
601 |
|
602 |
+
# Main entry point
|
603 |
if __name__ == "__main__":
|
604 |
+
try:
|
605 |
+
ui = create_ui()
|
606 |
+
ui.launch()
|
607 |
+
except Exception as e:
|
608 |
+
logger.critical(f"Failed to start application: {str(e)}")
|