Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import logging
|
3 |
import datasets
|
4 |
import shodan
|
@@ -6,39 +8,64 @@ import asyncio
|
|
6 |
import aiohttp
|
7 |
import json
|
8 |
import gradio as gr
|
9 |
-
from typing import List, Dict, Any, Optional
|
|
|
10 |
|
11 |
-
# Configure logging
|
12 |
-
logging.basicConfig(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
logger = logging.getLogger(__name__)
|
14 |
|
15 |
-
def validate_env_variables():
|
16 |
-
"""Validate that required environment variables are set."""
|
17 |
-
required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
|
18 |
-
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
19 |
-
if missing_vars:
|
20 |
-
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
|
21 |
-
|
22 |
def load_or_create_dataset():
|
23 |
-
"""
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
hf_token = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
try:
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
else:
|
36 |
-
#
|
37 |
-
return dataset
|
38 |
|
39 |
except FileNotFoundError:
|
40 |
-
logger.info("Dataset not found
|
41 |
-
|
|
|
42 |
empty_dataset = datasets.Dataset.from_dict({
|
43 |
"ip": [],
|
44 |
"port": [],
|
@@ -48,26 +75,38 @@ def load_or_create_dataset():
|
|
48 |
"models": []
|
49 |
})
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
)
|
56 |
-
|
57 |
-
# Load the newly created dataset
|
58 |
-
dataset = datasets.load_dataset(
|
59 |
-
"latterworks/llama_checker_results",
|
60 |
-
use_auth_token=hf_token
|
61 |
-
)
|
62 |
-
|
63 |
-
if "train" in dataset:
|
64 |
-
return dataset["train"]
|
65 |
-
else:
|
66 |
-
return dataset[next(iter(dataset))]
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
def scan_shodan(progress=gr.Progress()) -> List[Dict]:
|
73 |
"""
|
@@ -179,89 +218,179 @@ async def check_single_endpoint(session, instance):
|
|
179 |
|
180 |
async def check_ollama_endpoints(instances, progress=gr.Progress()):
|
181 |
"""
|
182 |
-
|
183 |
|
184 |
Args:
|
185 |
instances: List of Ollama instances from Shodan
|
186 |
-
progress: Gradio progress bar
|
187 |
|
188 |
Returns:
|
189 |
-
List of Ollama instances with model information
|
190 |
"""
|
191 |
if not instances:
|
|
|
192 |
return []
|
193 |
-
|
194 |
-
progress(0, desc="Checking Ollama endpoints")
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
-
# Process
|
204 |
updated_instances = []
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
|
|
|
|
|
210 |
return updated_instances
|
211 |
|
212 |
def update_dataset_with_instances(dataset, instances):
|
213 |
"""
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
Args:
|
217 |
-
dataset: HuggingFace dataset
|
218 |
instances: List of Ollama instances with model information
|
219 |
|
220 |
Returns:
|
221 |
-
Updated HuggingFace dataset
|
222 |
"""
|
223 |
if not instances:
|
224 |
-
logger.warning("No
|
225 |
return dataset
|
226 |
-
|
227 |
-
# Convert dataset to list of dictionaries for easier manipulation
|
228 |
-
dataset_dict = {f"{item['ip']}:{item['port']}": item for item in dataset.to_list()}
|
229 |
|
230 |
-
|
231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
new_instances = []
|
233 |
|
234 |
for instance in instances:
|
|
|
|
|
|
|
|
|
235 |
instance_key = f"{instance['ip']}:{instance['port']}"
|
236 |
|
237 |
if instance_key in dataset_dict:
|
238 |
-
#
|
239 |
-
|
240 |
-
|
241 |
-
|
|
|
|
|
|
|
|
|
242 |
|
243 |
-
#
|
244 |
if instance.get('models'):
|
245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
246 |
|
247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
else:
|
249 |
-
#
|
250 |
new_instances.append(instance)
|
|
|
251 |
|
252 |
-
#
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
def get_unique_values(dataset):
|
267 |
"""
|
@@ -377,150 +506,296 @@ def search_models(dataset, family=None, parameter_size=None, name_search=None, i
|
|
377 |
return results
|
378 |
|
379 |
def create_interface():
|
380 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
try:
|
382 |
-
#
|
|
|
383 |
dataset = load_or_create_dataset()
|
384 |
|
385 |
-
#
|
386 |
unique_values = get_unique_values(dataset)
|
|
|
387 |
|
388 |
-
#
|
389 |
initial_results = search_models(dataset)
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
|
391 |
-
# Create
|
392 |
-
with gr.Blocks(
|
393 |
-
|
394 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
|
|
396 |
with gr.Tabs() as tabs:
|
397 |
-
#
|
398 |
with gr.TabItem("Browse Models"):
|
399 |
with gr.Row():
|
|
|
400 |
with gr.Column(scale=1):
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
|
|
|
|
|
|
|
|
|
|
416 |
|
|
|
417 |
with gr.Row():
|
|
|
418 |
results_table = gr.DataFrame(
|
419 |
value=initial_results,
|
420 |
headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
|
421 |
-
label="
|
|
|
|
|
422 |
)
|
423 |
|
|
|
424 |
with gr.Row():
|
425 |
-
|
|
|
|
|
|
|
|
|
426 |
|
427 |
-
# Shodan
|
428 |
-
with gr.TabItem("Shodan Scan
|
429 |
-
gr.
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
-
# Define event handlers
|
436 |
def on_search_click(family, parameter_size, name_search):
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
|
451 |
def on_table_select(evt: gr.SelectData, results):
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
456 |
|
457 |
async def run_shodan_scan():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
try:
|
459 |
-
#
|
460 |
-
if not os.getenv("SHODAN_API_KEY"):
|
461 |
-
return "Error: SHODAN_API_KEY environment variable is not set."
|
462 |
-
|
463 |
-
# Perform Shodan scan
|
464 |
instances = scan_shodan()
|
465 |
-
|
466 |
if not instances:
|
467 |
-
return "No Ollama instances found
|
468 |
|
469 |
-
|
|
|
|
|
|
|
470 |
updated_instances = await check_ollama_endpoints(instances)
|
|
|
|
|
471 |
|
472 |
-
#
|
473 |
nonlocal dataset
|
474 |
dataset = update_dataset_with_instances(dataset, updated_instances)
|
475 |
|
476 |
-
#
|
477 |
nonlocal unique_values
|
478 |
unique_values = get_unique_values(dataset)
|
479 |
|
480 |
-
# Update
|
481 |
family_dropdown.choices = ["All"] + unique_values['families']
|
482 |
parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
|
483 |
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
|
489 |
-
# Connect event handlers
|
490 |
search_button.click(
|
491 |
-
on_search_click,
|
492 |
inputs=[family_dropdown, parameter_size_dropdown, name_search],
|
493 |
outputs=[results_table]
|
494 |
)
|
495 |
|
496 |
results_table.select(
|
497 |
-
on_table_select,
|
498 |
inputs=[results_table],
|
499 |
outputs=[model_details]
|
500 |
)
|
501 |
|
502 |
shodan_scan_button.click(
|
503 |
-
run_shodan_scan,
|
504 |
inputs=[],
|
505 |
outputs=[scan_status]
|
506 |
)
|
507 |
|
|
|
508 |
return interface
|
509 |
|
510 |
-
except Exception as
|
511 |
-
logger.
|
512 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
|
514 |
def main():
|
515 |
-
"""
|
|
|
|
|
516 |
try:
|
|
|
|
|
|
|
|
|
|
|
517 |
interface = create_interface()
|
|
|
518 |
if interface:
|
|
|
519 |
interface.launch()
|
520 |
else:
|
521 |
-
logger.
|
522 |
-
|
523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
|
525 |
if __name__ == "__main__":
|
526 |
main()
|
|
|
1 |
import os
|
2 |
+
import sys
|
3 |
+
import time
|
4 |
import logging
|
5 |
import datasets
|
6 |
import shodan
|
|
|
8 |
import aiohttp
|
9 |
import json
|
10 |
import gradio as gr
|
11 |
+
from typing import List, Dict, Any, Optional, Tuple, Set, Union
|
12 |
+
from concurrent.futures import ThreadPoolExecutor
|
13 |
|
14 |
+
# Configure production-grade logging
|
15 |
+
logging.basicConfig(
|
16 |
+
level=logging.INFO,
|
17 |
+
format='%(asctime)s - %(name)s - %(levelname)s [%(filename)s:%(lineno)d] - %(message)s',
|
18 |
+
handlers=[
|
19 |
+
logging.StreamHandler(),
|
20 |
+
logging.FileHandler("ollama_scanner.log")
|
21 |
+
]
|
22 |
+
)
|
23 |
logger = logging.getLogger(__name__)
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def load_or_create_dataset():
|
26 |
+
"""
|
27 |
+
Load dataset from HuggingFace with optimized error handling and authentication.
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
Dataset: The loaded dataset object ready for query operations
|
31 |
+
|
32 |
+
Raises:
|
33 |
+
ValueError: When authentication fails or dataset structure is invalid
|
34 |
+
ConnectionError: When network issues prevent dataset access
|
35 |
+
"""
|
36 |
+
# HF token must exist for private dataset access
|
37 |
hf_token = os.getenv("HF_TOKEN")
|
38 |
+
if not hf_token:
|
39 |
+
raise ValueError("HF_TOKEN environment variable missing or empty - authentication required")
|
40 |
+
|
41 |
+
dataset_id = "latterworks/llama_checker_results"
|
42 |
+
logger.info(f"Initializing dataset access: {dataset_id}")
|
43 |
|
44 |
try:
|
45 |
+
# First attempt: Try modern token parameter
|
46 |
+
try:
|
47 |
+
dataset = datasets.load_dataset(dataset_id, token=hf_token)
|
48 |
+
except TypeError:
|
49 |
+
# Fallback: Use legacy authentication parameter
|
50 |
+
logger.info("Attempting legacy authentication method")
|
51 |
+
dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
|
52 |
+
|
53 |
+
# Extract the appropriate split
|
54 |
+
if isinstance(dataset, datasets.DatasetDict):
|
55 |
+
if "train" in dataset:
|
56 |
+
return dataset["train"]
|
57 |
+
# No train split found, use first available
|
58 |
+
first_split = next(iter(dataset))
|
59 |
+
logger.info(f"No 'train' split found, using '{first_split}' split")
|
60 |
+
return dataset[first_split]
|
61 |
else:
|
62 |
+
# Handle direct Dataset object (no splits)
|
63 |
+
return dataset
|
64 |
|
65 |
except FileNotFoundError:
|
66 |
+
logger.info(f"Dataset {dataset_id} not found - creating new dataset")
|
67 |
+
|
68 |
+
# Prepare empty dataset with precise schema
|
69 |
empty_dataset = datasets.Dataset.from_dict({
|
70 |
"ip": [],
|
71 |
"port": [],
|
|
|
75 |
"models": []
|
76 |
})
|
77 |
|
78 |
+
try:
|
79 |
+
# Create dataset on Hub with correct token parameter
|
80 |
+
empty_dataset.push_to_hub(dataset_id, token=hf_token)
|
81 |
+
logger.info(f"Successfully created empty dataset: {dataset_id}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
+
# Load the newly created dataset
|
84 |
+
try:
|
85 |
+
dataset = datasets.load_dataset(dataset_id, token=hf_token)
|
86 |
+
except TypeError:
|
87 |
+
dataset = datasets.load_dataset(dataset_id, use_auth_token=hf_token)
|
88 |
+
|
89 |
+
# Extract appropriate split
|
90 |
+
if isinstance(dataset, datasets.DatasetDict):
|
91 |
+
if "train" in dataset:
|
92 |
+
return dataset["train"]
|
93 |
+
first_split = next(iter(dataset))
|
94 |
+
logger.info(f"Using '{first_split}' split from newly created dataset")
|
95 |
+
return dataset[first_split]
|
96 |
+
else:
|
97 |
+
return dataset
|
98 |
+
|
99 |
+
except Exception as creation_error:
|
100 |
+
logger.error(f"Dataset creation failed: {creation_error}")
|
101 |
+
raise ValueError(f"Failed to create dataset: {creation_error}") from creation_error
|
102 |
+
|
103 |
+
except (ConnectionError, TimeoutError) as network_error:
|
104 |
+
logger.error(f"Network error accessing dataset: {network_error}")
|
105 |
+
raise ConnectionError(f"Network failure accessing HuggingFace Hub: {network_error}") from network_error
|
106 |
+
|
107 |
+
except Exception as general_error:
|
108 |
+
logger.error(f"Unexpected error accessing dataset: {general_error}")
|
109 |
+
raise ValueError(f"Dataset access failed: {general_error}") from general_error
|
110 |
|
111 |
def scan_shodan(progress=gr.Progress()) -> List[Dict]:
|
112 |
"""
|
|
|
218 |
|
219 |
async def check_ollama_endpoints(instances, progress=gr.Progress()):
|
220 |
"""
|
221 |
+
Efficiently check multiple Ollama endpoints with concurrent processing and comprehensive error handling.
|
222 |
|
223 |
Args:
|
224 |
instances: List of Ollama instances from Shodan
|
225 |
+
progress: Gradio progress bar for visual feedback
|
226 |
|
227 |
Returns:
|
228 |
+
List of Ollama instances with enriched model information
|
229 |
"""
|
230 |
if not instances:
|
231 |
+
logger.info("No instances to check - skipping endpoint verification")
|
232 |
return []
|
|
|
|
|
233 |
|
234 |
+
total_instances = len(instances)
|
235 |
+
logger.info(f"Initiating concurrent validation of {total_instances} Ollama endpoints")
|
236 |
+
progress(0, desc=f"Preparing to check {total_instances} Ollama endpoints")
|
237 |
+
|
238 |
+
# Configure optimized session with connection pooling and timeouts
|
239 |
+
conn = aiohttp.TCPConnector(limit=50, ttl_dns_cache=300)
|
240 |
+
timeout = aiohttp.ClientTimeout(total=30, connect=5, sock_connect=5, sock_read=20)
|
241 |
+
|
242 |
+
async with aiohttp.ClientSession(connector=conn, timeout=timeout) as session:
|
243 |
+
# Create task queue
|
244 |
+
tasks = [check_single_endpoint(session, instance) for instance in instances]
|
245 |
|
246 |
+
# Process with dynamic progress tracking
|
247 |
updated_instances = []
|
248 |
+
completed = 0
|
249 |
+
|
250 |
+
for future in asyncio.as_completed(tasks):
|
251 |
+
try:
|
252 |
+
# Process completed task
|
253 |
+
instance = await future
|
254 |
+
updated_instances.append(instance)
|
255 |
+
|
256 |
+
# Update progress with meaningful metrics
|
257 |
+
completed += 1
|
258 |
+
progress_pct = completed / total_instances
|
259 |
+
progress(progress_pct, desc=f"Checked {completed}/{total_instances} endpoints ({progress_pct:.1%})")
|
260 |
+
|
261 |
+
# Log models found
|
262 |
+
if instance.get('models'):
|
263 |
+
logger.info(f"Found {len(instance['models'])} models at {instance['ip']}:{instance['port']}")
|
264 |
+
|
265 |
+
except Exception as task_error:
|
266 |
+
# Handle per-task errors without stopping the process
|
267 |
+
logger.warning(f"Endpoint check failed: {task_error}")
|
268 |
+
# Continue processing remaining endpoints
|
269 |
|
270 |
+
valid_instances = [i for i in updated_instances if i.get('models')]
|
271 |
+
logger.info(f"Endpoint validation complete: {len(valid_instances)}/{total_instances} accessible")
|
272 |
return updated_instances
|
273 |
|
274 |
def update_dataset_with_instances(dataset, instances):
|
275 |
"""
|
276 |
+
Efficiently update HuggingFace dataset with optimized delta synchronization.
|
277 |
+
|
278 |
+
Implements single-pass dataset updates with:
|
279 |
+
1. Optimized in-memory index of existing entries
|
280 |
+
2. Differential detection of new vs. modified instances
|
281 |
+
3. Single hub push with consolidated changes
|
282 |
|
283 |
Args:
|
284 |
+
dataset: HuggingFace dataset object to update
|
285 |
instances: List of Ollama instances with model information
|
286 |
|
287 |
Returns:
|
288 |
+
Updated HuggingFace dataset with synchronized changes
|
289 |
"""
|
290 |
if not instances:
|
291 |
+
logger.warning("No instance data provided for dataset update operation")
|
292 |
return dataset
|
|
|
|
|
|
|
293 |
|
294 |
+
start_time = time.time()
|
295 |
+
|
296 |
+
# Optimization: Create indexed lookup of existing instances for O(1) access
|
297 |
+
dataset_dict = {}
|
298 |
+
for idx, item in enumerate(dataset):
|
299 |
+
key = f"{item['ip']}:{item['port']}"
|
300 |
+
dataset_dict[key] = {
|
301 |
+
'idx': idx,
|
302 |
+
'data': item
|
303 |
+
}
|
304 |
+
|
305 |
+
# Track modification metrics
|
306 |
+
stats = {
|
307 |
+
'new': 0,
|
308 |
+
'updated': 0,
|
309 |
+
'unchanged': 0,
|
310 |
+
'models_added': 0
|
311 |
+
}
|
312 |
+
|
313 |
+
# Process differentials
|
314 |
+
update_candidates = []
|
315 |
new_instances = []
|
316 |
|
317 |
for instance in instances:
|
318 |
+
# Skip instances without valid IP
|
319 |
+
if not instance.get('ip'):
|
320 |
+
continue
|
321 |
+
|
322 |
instance_key = f"{instance['ip']}:{instance['port']}"
|
323 |
|
324 |
if instance_key in dataset_dict:
|
325 |
+
# Existing instance - determine if update needed
|
326 |
+
existing = dataset_dict[instance_key]['data']
|
327 |
+
needs_update = False
|
328 |
+
|
329 |
+
# Check metadata changes
|
330 |
+
for field in ['country', 'region', 'org']:
|
331 |
+
if instance.get(field) and instance.get(field) != existing.get(field):
|
332 |
+
needs_update = True
|
333 |
|
334 |
+
# Check model changes - only update if models were found
|
335 |
if instance.get('models'):
|
336 |
+
# Compare model signatures to detect changes
|
337 |
+
existing_models = {model.get('name', ''): model for model in existing.get('models', [])}
|
338 |
+
new_models = {model.get('name', ''): model for model in instance.get('models', [])}
|
339 |
+
|
340 |
+
if set(new_models.keys()) != set(existing_models.keys()):
|
341 |
+
needs_update = True
|
342 |
+
stats['models_added'] += len(set(new_models.keys()) - set(existing_models.keys()))
|
343 |
|
344 |
+
if needs_update:
|
345 |
+
# Create updated instance
|
346 |
+
updated = dict(existing)
|
347 |
+
updated.update({
|
348 |
+
'country': instance.get('country', existing.get('country')),
|
349 |
+
'region': instance.get('region', existing.get('region')),
|
350 |
+
'org': instance.get('org', existing.get('org')),
|
351 |
+
})
|
352 |
+
|
353 |
+
# Only update models if they were found
|
354 |
+
if instance.get('models'):
|
355 |
+
updated['models'] = instance['models']
|
356 |
+
|
357 |
+
update_candidates.append(updated)
|
358 |
+
stats['updated'] += 1
|
359 |
+
else:
|
360 |
+
stats['unchanged'] += 1
|
361 |
else:
|
362 |
+
# New instance
|
363 |
new_instances.append(instance)
|
364 |
+
stats['new'] += 1
|
365 |
|
366 |
+
# Efficiently construct updated dataset
|
367 |
+
if new_instances or update_candidates:
|
368 |
+
# Start with current dataset
|
369 |
+
current_data = dataset.to_list()
|
370 |
+
|
371 |
+
# Apply updates
|
372 |
+
for updated in update_candidates:
|
373 |
+
instance_key = f"{updated['ip']}:{updated['port']}"
|
374 |
+
idx = dataset_dict[instance_key]['idx']
|
375 |
+
current_data[idx] = updated
|
376 |
+
|
377 |
+
# Add new instances
|
378 |
+
current_data.extend(new_instances)
|
379 |
+
|
380 |
+
# Create updated dataset
|
381 |
+
updated_dataset = datasets.Dataset.from_list(current_data)
|
382 |
+
|
383 |
+
# Push to hub with single operation
|
384 |
+
hf_token = os.getenv("HF_TOKEN")
|
385 |
+
updated_dataset.push_to_hub("latterworks/llama_checker_results", token=hf_token)
|
386 |
+
|
387 |
+
execution_time = time.time() - start_time
|
388 |
+
logger.info(f"Dataset synchronization complete in {execution_time:.2f}s: {stats['new']} new, {stats['updated']} updated, {stats['unchanged']} unchanged, {stats['models_added']} new models")
|
389 |
+
|
390 |
+
return updated_dataset
|
391 |
+
else:
|
392 |
+
logger.info("No dataset changes detected - skipping hub synchronization")
|
393 |
+
return dataset
|
394 |
|
395 |
def get_unique_values(dataset):
|
396 |
"""
|
|
|
506 |
return results
|
507 |
|
508 |
def create_interface():
|
509 |
+
"""
|
510 |
+
Create enterprise-grade Gradio interface with optimized data loading and admin authentication.
|
511 |
+
|
512 |
+
Returns:
|
513 |
+
gr.Blocks: Fully configured Gradio interface ready for deployment
|
514 |
+
"""
|
515 |
+
# Administrative authentication function
|
516 |
+
def validate_admin():
|
517 |
+
"""Check if current user has admin privileges based on API key"""
|
518 |
+
# For production systems, this would use proper authentication
|
519 |
+
# Currently using API key presence as simple auth mechanism
|
520 |
+
admin_key = os.getenv("ADMIN_KEY", "")
|
521 |
+
shodan_key = os.getenv("SHODAN_API_KEY", "")
|
522 |
+
return bool(admin_key and shodan_key)
|
523 |
+
|
524 |
try:
|
525 |
+
# Initialize critical data structures once at startup
|
526 |
+
logger.info("Initializing application data layer")
|
527 |
dataset = load_or_create_dataset()
|
528 |
|
529 |
+
# Extract model metadata attributes for filtering
|
530 |
unique_values = get_unique_values(dataset)
|
531 |
+
logger.info(f"Loaded dataset with {len(unique_values['families'])} model families and {len(unique_values['parameter_sizes'])} parameter sizes")
|
532 |
|
533 |
+
# Preload initial model data
|
534 |
initial_results = search_models(dataset)
|
535 |
+
logger.info(f"Preloaded {len(initial_results)} models for initial display")
|
536 |
+
|
537 |
+
# Determine administrative access
|
538 |
+
is_admin = validate_admin()
|
539 |
+
admin_status = "enabled" if is_admin else "disabled"
|
540 |
+
logger.info(f"Administrative access: {admin_status}")
|
541 |
|
542 |
+
# Create interface with optimized structure
|
543 |
+
with gr.Blocks(
|
544 |
+
title="Ollama Instance Scanner",
|
545 |
+
theme=gr.themes.Soft(),
|
546 |
+
css=".footer {text-align: center; margin-top: 20px; color: #666;}"
|
547 |
+
) as interface:
|
548 |
+
# Header section
|
549 |
+
with gr.Row():
|
550 |
+
with gr.Column():
|
551 |
+
gr.Markdown("# Ollama Instance Scanner")
|
552 |
+
gr.Markdown("Browse publicly accessible Ollama models and their capabilities")
|
553 |
|
554 |
+
# Tab container
|
555 |
with gr.Tabs() as tabs:
|
556 |
+
# Tab 1: Model Browser (Public)
|
557 |
with gr.TabItem("Browse Models"):
|
558 |
with gr.Row():
|
559 |
+
# Filter controls
|
560 |
with gr.Column(scale=1):
|
561 |
+
with gr.Box():
|
562 |
+
gr.Markdown("### Search Filters")
|
563 |
+
family_dropdown = gr.Dropdown(
|
564 |
+
choices=["All"] + unique_values['families'],
|
565 |
+
value="All",
|
566 |
+
label="Model Family",
|
567 |
+
interactive=True
|
568 |
+
)
|
569 |
+
parameter_size_dropdown = gr.Dropdown(
|
570 |
+
choices=["All"] + unique_values['parameter_sizes'],
|
571 |
+
value="All",
|
572 |
+
label="Parameter Size",
|
573 |
+
interactive=True
|
574 |
+
)
|
575 |
+
name_search = gr.Textbox(
|
576 |
+
label="Model Name",
|
577 |
+
placeholder="Enter model name...",
|
578 |
+
interactive=True
|
579 |
+
)
|
580 |
+
search_button = gr.Button("Search Models", variant="primary")
|
581 |
|
582 |
+
# Results section
|
583 |
with gr.Row():
|
584 |
+
# Model results table
|
585 |
results_table = gr.DataFrame(
|
586 |
value=initial_results,
|
587 |
headers=["name", "family", "parameter_size", "quantization_level", "size_gb", "country", "region", "org"],
|
588 |
+
label="Available Models",
|
589 |
+
interactive=False,
|
590 |
+
wrap=True
|
591 |
)
|
592 |
|
593 |
+
# Details section
|
594 |
with gr.Row():
|
595 |
+
# Model specifications panel
|
596 |
+
model_details = gr.JSON(
|
597 |
+
label="Model Specifications",
|
598 |
+
visible=True
|
599 |
+
)
|
600 |
|
601 |
+
# Tab 2: Shodan Scanner (Admin Only)
|
602 |
+
with gr.TabItem("Shodan Scan", visible=is_admin):
|
603 |
+
with gr.Box():
|
604 |
+
gr.Markdown("## Ollama Instance Scanner")
|
605 |
+
gr.Markdown("This tool scans for publicly accessible Ollama instances using Shodan API")
|
606 |
+
|
607 |
+
# Scanner controls
|
608 |
+
with gr.Row():
|
609 |
+
shodan_scan_button = gr.Button(
|
610 |
+
"Start Shodan Scan",
|
611 |
+
variant="primary",
|
612 |
+
interactive=is_admin
|
613 |
+
)
|
614 |
+
|
615 |
+
# Status display
|
616 |
+
with gr.Row():
|
617 |
+
scan_status = gr.Textbox(
|
618 |
+
label="Scan Status",
|
619 |
+
value="Ready to scan" if is_admin else "Admin access required",
|
620 |
+
interactive=False
|
621 |
+
)
|
622 |
+
|
623 |
+
# Footer
|
624 |
+
gr.Markdown(
|
625 |
+
"### Ollama Instance Scanner | Powered by Shodan & Hugging Face",
|
626 |
+
elem_classes=["footer"]
|
627 |
+
)
|
628 |
|
629 |
+
# Define optimized event handlers
|
630 |
def on_search_click(family, parameter_size, name_search):
|
631 |
+
"""Process model search with optimized filtering"""
|
632 |
+
try:
|
633 |
+
# Apply filters
|
634 |
+
family_filter = None if family == "All" else family
|
635 |
+
param_size_filter = None if parameter_size == "All" else parameter_size
|
636 |
+
name_filter = None if not name_search else name_search.strip()
|
637 |
+
|
638 |
+
# Execute search with admin privileges if available
|
639 |
+
results = search_models(
|
640 |
+
dataset,
|
641 |
+
family_filter,
|
642 |
+
param_size_filter,
|
643 |
+
name_filter,
|
644 |
+
is_admin
|
645 |
+
)
|
646 |
+
|
647 |
+
logger.info(f"Search completed: {len(results)} models found matching criteria")
|
648 |
+
return results
|
649 |
+
except Exception as search_error:
|
650 |
+
logger.error(f"Search failed: {search_error}")
|
651 |
+
# Return empty results on error
|
652 |
+
return []
|
653 |
|
654 |
def on_table_select(evt: gr.SelectData, results):
|
655 |
+
"""Handle table row selection with error protection"""
|
656 |
+
try:
|
657 |
+
if evt and evt.index and len(results) > evt.index[0]:
|
658 |
+
selected_row = results[evt.index[0]]
|
659 |
+
# Extract and return model details
|
660 |
+
return selected_row.get('full_model_info', "{}")
|
661 |
+
return "{}"
|
662 |
+
except Exception as selection_error:
|
663 |
+
logger.error(f"Selection error: {selection_error}")
|
664 |
+
return "{}"
|
665 |
|
666 |
async def run_shodan_scan():
|
667 |
+
"""Execute Shodan scan workflow with comprehensive monitoring"""
|
668 |
+
if not is_admin:
|
669 |
+
return "Error: Administrative access required"
|
670 |
+
|
671 |
+
scan_id = int(time.time()) # Generate unique scan identifier
|
672 |
+
logger.info(f"Initiating Shodan scan {scan_id}")
|
673 |
+
|
674 |
try:
|
675 |
+
# Phase 1: Shodan API scan
|
|
|
|
|
|
|
|
|
676 |
instances = scan_shodan()
|
|
|
677 |
if not instances:
|
678 |
+
return "Scan complete: No Ollama instances found"
|
679 |
|
680 |
+
instance_count = len(instances)
|
681 |
+
logger.info(f"Scan {scan_id}: Found {instance_count} potential Ollama instances")
|
682 |
+
|
683 |
+
# Phase 2: Endpoint validation
|
684 |
updated_instances = await check_ollama_endpoints(instances)
|
685 |
+
accessible_count = sum(1 for i in updated_instances if i.get('models'))
|
686 |
+
logger.info(f"Scan {scan_id}: Validated {accessible_count} accessible instances")
|
687 |
|
688 |
+
# Phase 3: Dataset synchronization
|
689 |
nonlocal dataset
|
690 |
dataset = update_dataset_with_instances(dataset, updated_instances)
|
691 |
|
692 |
+
# Phase 4: Interface update
|
693 |
nonlocal unique_values
|
694 |
unique_values = get_unique_values(dataset)
|
695 |
|
696 |
+
# Update UI components with new data
|
697 |
family_dropdown.choices = ["All"] + unique_values['families']
|
698 |
parameter_size_dropdown.choices = ["All"] + unique_values['parameter_sizes']
|
699 |
|
700 |
+
# Build detailed completion report
|
701 |
+
report = (
|
702 |
+
f"Scan {scan_id} completed successfully:\n"
|
703 |
+
f"• {instance_count} total instances discovered\n"
|
704 |
+
f"• {accessible_count} instances with accessible models\n"
|
705 |
+
f"• {len(unique_values['families'])} unique model families\n"
|
706 |
+
f"• {len(unique_values['parameter_sizes'])} parameter size variants"
|
707 |
+
)
|
708 |
+
|
709 |
+
logger.info(f"Scan {scan_id} completed successfully")
|
710 |
+
return report
|
711 |
+
|
712 |
+
except Exception as scan_error:
|
713 |
+
logger.error(f"Scan {scan_id} failed: {scan_error}")
|
714 |
+
|
715 |
+
# Generate actionable error message
|
716 |
+
if isinstance(scan_error, ValueError) and "API key" in str(scan_error):
|
717 |
+
return "Error: Invalid Shodan API key. Please check your SHODAN_API_KEY environment variable."
|
718 |
+
elif isinstance(scan_error, ConnectionError):
|
719 |
+
return "Error: Network connectivity issue. Please check your internet connection."
|
720 |
+
else:
|
721 |
+
return f"Error: Scan operation failed - {str(scan_error)}"
|
722 |
|
723 |
+
# Connect event handlers to UI components
|
724 |
search_button.click(
|
725 |
+
fn=on_search_click,
|
726 |
inputs=[family_dropdown, parameter_size_dropdown, name_search],
|
727 |
outputs=[results_table]
|
728 |
)
|
729 |
|
730 |
results_table.select(
|
731 |
+
fn=on_table_select,
|
732 |
inputs=[results_table],
|
733 |
outputs=[model_details]
|
734 |
)
|
735 |
|
736 |
shodan_scan_button.click(
|
737 |
+
fn=run_shodan_scan,
|
738 |
inputs=[],
|
739 |
outputs=[scan_status]
|
740 |
)
|
741 |
|
742 |
+
logger.info("Gradio interface successfully initialized")
|
743 |
return interface
|
744 |
|
745 |
+
except Exception as interface_error:
|
746 |
+
logger.critical(f"Interface initialization failed: {interface_error}")
|
747 |
+
raise ValueError(f"Failed to create application interface: {interface_error}") from interface_error
|
748 |
+
|
749 |
+
def validate_env_variables():
|
750 |
+
"""
|
751 |
+
Centralized validation of critical environment variables with precise error messaging.
|
752 |
+
|
753 |
+
Raises:
|
754 |
+
ValueError: When any required environment variable is missing
|
755 |
+
"""
|
756 |
+
required_vars = ["SHODAN_API_KEY", "HF_TOKEN"]
|
757 |
+
missing_vars = [var for var in required_vars if not os.getenv(var)]
|
758 |
+
|
759 |
+
if missing_vars:
|
760 |
+
error_msg = f"Missing critical environment variables: {', '.join(missing_vars)}"
|
761 |
+
logger.critical(error_msg)
|
762 |
+
raise ValueError(error_msg)
|
763 |
+
|
764 |
+
# Validate token quality
|
765 |
+
hf_token = os.getenv("HF_TOKEN")
|
766 |
+
if len(hf_token) < 8: # Minimum length for plausible token
|
767 |
+
logger.warning("HF_TOKEN appears malformed (insufficient length)")
|
768 |
+
|
769 |
+
logger.info("Environment validation successful - all required variables present")
|
770 |
|
771 |
def main():
|
772 |
+
"""
|
773 |
+
Application entry point with centralized error handling and environment validation.
|
774 |
+
"""
|
775 |
try:
|
776 |
+
# Validate environment once at startup
|
777 |
+
validate_env_variables()
|
778 |
+
|
779 |
+
# Initialize and launch interface
|
780 |
+
logger.info("Initializing Gradio interface")
|
781 |
interface = create_interface()
|
782 |
+
|
783 |
if interface:
|
784 |
+
logger.info("Starting Gradio server")
|
785 |
interface.launch()
|
786 |
else:
|
787 |
+
logger.critical("Interface initialization failed")
|
788 |
+
sys.exit(1)
|
789 |
+
|
790 |
+
except ValueError as config_error:
|
791 |
+
# Handle configuration errors
|
792 |
+
logger.critical(f"Configuration error: {config_error}")
|
793 |
+
sys.exit(1)
|
794 |
+
|
795 |
+
except Exception as fatal_error:
|
796 |
+
# Handle unexpected errors
|
797 |
+
logger.critical(f"Fatal application error: {fatal_error}")
|
798 |
+
sys.exit(1)
|
799 |
|
800 |
if __name__ == "__main__":
|
801 |
main()
|