Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,564 +1,532 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
-
import
|
4 |
import bcrypt
|
5 |
-
import
|
6 |
-
from huggingface_hub import HfApi, login
|
7 |
-
from datasets import load_dataset, Dataset, Features, Value, Sequence
|
8 |
-
from typing import Dict, List, Optional, Any
|
9 |
-
import time
|
10 |
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
11 |
import shodan
|
12 |
-
import
|
|
|
|
|
|
|
13 |
|
14 |
-
#
|
15 |
logging.basicConfig(
|
16 |
level=logging.INFO,
|
17 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
18 |
)
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
return secrets.get_secret(name)
|
27 |
-
except Exception as e:
|
28 |
-
logger.warning(f"Error accessing secret {name}: {e}")
|
29 |
-
return ""
|
30 |
-
|
31 |
-
# Sanitize inputs
|
32 |
-
def sanitize_input(text: str) -> str:
|
33 |
-
"""Sanitize user input to prevent injection attacks."""
|
34 |
-
if not text:
|
35 |
-
return ""
|
36 |
-
return html.escape(text)
|
37 |
|
38 |
# Dataset functions
|
39 |
-
def get_or_create_dataset(
|
40 |
"""
|
41 |
-
|
42 |
|
43 |
Args:
|
44 |
-
|
45 |
-
token: Authentication token for private datasets
|
46 |
|
47 |
Returns:
|
48 |
-
The dataset
|
49 |
"""
|
50 |
try:
|
51 |
# Try to load the dataset
|
52 |
-
|
53 |
-
return dataset["train"]
|
54 |
-
except Exception as e:
|
55 |
-
logger.info(f"Dataset {repo_id} not found or error loading it: {e}")
|
56 |
-
|
57 |
-
# Create a new dataset with the required schema
|
58 |
-
features = Features({
|
59 |
-
"ip": Value("string"),
|
60 |
-
"port": Value("int32"),
|
61 |
-
"country": Value("string", default_value=""),
|
62 |
-
"region": Value("string", default_value=""),
|
63 |
-
"org": Value("string", default_value=""),
|
64 |
-
"models": Sequence({
|
65 |
-
"name": Value("string"),
|
66 |
-
"family": Value("string", default_value=""),
|
67 |
-
"parameter_size": Value("string", default_value=""),
|
68 |
-
"quantization_level": Value("string", default_value=""),
|
69 |
-
"digest": Value("string", default_value=""),
|
70 |
-
"modified_at": Value("string", default_value=""),
|
71 |
-
"size": Value("int64", default_value=0)
|
72 |
-
}, default_value=[])
|
73 |
-
})
|
74 |
-
|
75 |
-
# Create empty dataset
|
76 |
-
empty_dataset = Dataset.from_dict({
|
77 |
-
"ip": [],
|
78 |
-
"port": [],
|
79 |
-
"country": [],
|
80 |
-
"region": [],
|
81 |
-
"org": [],
|
82 |
-
"models": []
|
83 |
-
}, features=features)
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
def
|
91 |
"""
|
92 |
-
|
93 |
|
94 |
Args:
|
95 |
dataset: The dataset to update
|
96 |
-
|
97 |
|
98 |
Returns:
|
99 |
-
The updated dataset
|
100 |
"""
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
# Find the index if it exists
|
106 |
-
existing_idx = None
|
107 |
-
for idx, item in enumerate(dataset):
|
108 |
-
if item["ip"] == ip and item["port"] == port:
|
109 |
-
existing_idx = idx
|
110 |
-
break
|
111 |
-
|
112 |
-
if existing_idx is not None:
|
113 |
-
# Update existing entry
|
114 |
-
dataset = dataset.select(list(range(len(dataset))))
|
115 |
-
new_examples = list(dataset)
|
116 |
-
new_examples[existing_idx] = entry
|
117 |
-
return Dataset.from_dict({k: [ex[k] for ex in new_examples] for k in dataset.column_names})
|
118 |
-
else:
|
119 |
-
# Add new entry
|
120 |
-
new_dataset = dataset.add_item(entry)
|
121 |
-
return new_dataset
|
122 |
-
|
123 |
-
def push_dataset_to_hub(dataset: Dataset, repo_id: str, token: Optional[str] = None):
|
124 |
-
"""
|
125 |
-
Push dataset to the Hugging Face Hub.
|
126 |
|
127 |
-
|
128 |
-
|
129 |
-
repo_id: The repository ID
|
130 |
-
token: Authentication token
|
131 |
-
"""
|
132 |
-
try:
|
133 |
-
dataset.push_to_hub(repo_id, token=token)
|
134 |
-
logger.info(f"Successfully pushed dataset to {repo_id}")
|
135 |
-
except Exception as e:
|
136 |
-
logger.error(f"Error pushing dataset to hub: {e}")
|
137 |
-
|
138 |
-
# Shodan functions
|
139 |
-
def scan_with_shodan(shodan_api_key: str, query: str, max_results: int = 1000) -> List[Dict[str, Any]]:
|
140 |
-
"""
|
141 |
-
Scan with Shodan API for Ollama instances.
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
# Get the number of total results
|
160 |
-
count_result = api.count(query)
|
161 |
-
total_results = count_result['total']
|
162 |
-
logger.info(f"Found {total_results} results for query: {query}")
|
163 |
-
|
164 |
-
# Limit to max_results
|
165 |
-
pages = min(total_results, max_results) // 100
|
166 |
-
if min(total_results, max_results) % 100 > 0:
|
167 |
-
pages += 1
|
168 |
-
|
169 |
-
for page in range(1, pages + 1):
|
170 |
-
try:
|
171 |
-
result_page = api.search(query, page=page)
|
172 |
-
for match in result_page['matches']:
|
173 |
-
instance = {
|
174 |
-
"ip": match.get("ip_str", ""),
|
175 |
-
"port": match.get("port", 11434), # Default Ollama port
|
176 |
-
"country": match.get("location", {}).get("country_name", ""),
|
177 |
-
"region": match.get("location", {}).get("region_name", ""),
|
178 |
-
"org": match.get("org", ""),
|
179 |
-
"models": [] # Will be populated later
|
180 |
-
}
|
181 |
-
results.append(instance)
|
182 |
-
logger.info(f"Processed page {page}/{pages}")
|
183 |
-
except shodan.APIError as e:
|
184 |
-
logger.error(f"Shodan API error on page {page}: {e}")
|
185 |
break
|
186 |
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
# Ollama endpoint checking
|
193 |
-
def check_ollama_endpoint(
|
194 |
"""
|
195 |
-
Check
|
196 |
|
197 |
Args:
|
198 |
-
|
199 |
-
|
200 |
|
201 |
Returns:
|
202 |
-
|
203 |
"""
|
204 |
-
ip = instance["ip"]
|
205 |
-
port = instance["port"]
|
206 |
url = f"http://{ip}:{port}/api/tags"
|
207 |
-
|
208 |
-
|
209 |
|
210 |
try:
|
211 |
-
response = requests.get(url, timeout=
|
212 |
response.raise_for_status()
|
213 |
|
214 |
data = response.json()
|
215 |
-
|
216 |
if "models" in data:
|
217 |
-
|
218 |
-
|
219 |
-
# Extract model details
|
220 |
model_info = {
|
221 |
-
"name":
|
222 |
-
"family":
|
223 |
-
"parameter_size":
|
224 |
-
"quantization_level":
|
225 |
-
"digest":
|
226 |
-
"modified_at":
|
227 |
-
"size":
|
228 |
}
|
229 |
-
|
230 |
-
|
231 |
-
updated_instance["models"] = models_list
|
232 |
-
logger.info(f"Successfully extracted {len(models_list)} models from {ip}:{port}")
|
233 |
-
else:
|
234 |
-
logger.warning(f"No models found in response from {ip}:{port}")
|
235 |
-
updated_instance["models"] = []
|
236 |
-
|
237 |
except requests.exceptions.RequestException as e:
|
238 |
-
logger.error(f"Network error
|
239 |
-
|
240 |
except ValueError as e:
|
241 |
logger.error(f"Invalid JSON from {ip}:{port}: {e}")
|
242 |
-
|
243 |
except Exception as e:
|
244 |
-
logger.exception(f"Unexpected error
|
245 |
-
|
246 |
|
247 |
-
return
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
-
|
250 |
-
def verify_password(password: str, stored_password: str) -> bool:
|
251 |
"""
|
252 |
-
|
253 |
|
254 |
Args:
|
255 |
-
|
256 |
-
stored_password: The stored password (hashed or plaintext)
|
257 |
|
258 |
Returns:
|
259 |
-
|
260 |
"""
|
261 |
-
|
262 |
-
|
|
|
|
|
263 |
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
|
271 |
-
#
|
272 |
-
def
|
273 |
"""
|
274 |
-
|
275 |
|
276 |
Returns:
|
277 |
-
|
278 |
"""
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
shodan_query = get_secret("SHODAN_QUERY")
|
290 |
-
if not shodan_query:
|
291 |
-
shodan_query = "product:Ollama port:11434"
|
292 |
-
logger.info(f"Using default Shodan query: {shodan_query}")
|
293 |
-
|
294 |
-
hf_token = get_secret("HF_TOKEN")
|
295 |
-
|
296 |
-
# Load dataset
|
297 |
-
dataset_repo_id = "latterworks/llama_checker_results"
|
298 |
-
dataset = get_or_create_dataset(dataset_repo_id, token=hf_token)
|
299 |
-
|
300 |
-
# Function to search and display models
|
301 |
-
def search_models(family, param_size, name, current_dataset):
|
302 |
-
# Sanitize inputs
|
303 |
-
name = sanitize_input(name)
|
304 |
-
|
305 |
-
results = []
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
continue
|
319 |
-
|
320 |
-
# Calculate size in GB
|
321 |
-
size_gb = round(model["size"] / (1024 * 1024 * 1024), 2) if model["size"] else 0
|
322 |
-
|
323 |
-
# Add to results
|
324 |
-
results.append([
|
325 |
-
model["name"],
|
326 |
-
model["family"],
|
327 |
-
model["parameter_size"],
|
328 |
-
model["quantization_level"],
|
329 |
-
size_gb
|
330 |
-
])
|
331 |
|
332 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
|
334 |
-
|
335 |
-
|
336 |
-
selected_row = evt.index[0]
|
337 |
-
model_name = results[selected_row][0]
|
338 |
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
details["org"] = item["org"]
|
362 |
-
|
363 |
-
return details
|
364 |
|
365 |
-
|
|
|
|
|
|
|
|
|
366 |
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
-
|
380 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
381 |
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
yield f"🔍 Found {len(instances)} instances. Checking endpoints...", [], current_dataset
|
386 |
-
|
387 |
-
# Check endpoints using executor
|
388 |
-
updated_instances = []
|
389 |
-
with ThreadPoolExecutor(max_workers=10) as executor:
|
390 |
-
# Create future tasks
|
391 |
-
future_to_instance = {
|
392 |
-
executor.submit(check_ollama_endpoint, instance): instance
|
393 |
-
for instance in instances
|
394 |
-
}
|
395 |
|
396 |
-
#
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
# Update dataset
|
409 |
-
updated_dataset = current_dataset
|
410 |
-
for instance in updated_instances:
|
411 |
-
updated_dataset = update_dataset_entry(updated_dataset, instance)
|
412 |
-
|
413 |
-
# Push to hub
|
414 |
-
push_dataset_to_hub(updated_dataset, dataset_repo_id, token=hf_token)
|
415 |
-
|
416 |
-
# Prepare results for display
|
417 |
-
results = []
|
418 |
-
total_models = 0
|
419 |
-
for instance in updated_instances:
|
420 |
-
models_count = len(instance["models"]) if instance["models"] else 0
|
421 |
-
total_models += models_count
|
422 |
-
results.append([
|
423 |
-
instance["ip"],
|
424 |
-
instance["port"],
|
425 |
-
instance["country"],
|
426 |
-
instance["region"],
|
427 |
-
instance["org"],
|
428 |
-
models_count
|
429 |
-
])
|
430 |
-
|
431 |
-
yield f"✅ Scan completed! Found {len(instances)} instances with a total of {total_models} models.", results, updated_dataset
|
432 |
-
|
433 |
-
except Exception as e:
|
434 |
-
logger.exception(f"Error during scan: {e}")
|
435 |
-
yield f"❌ Error during scan: {str(e)}", [], current_dataset
|
436 |
|
437 |
-
#
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
442 |
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
outputs=[results_df]
|
492 |
)
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
)
|
499 |
|
500 |
-
|
501 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
502 |
value="",
|
503 |
-
|
504 |
-
label="Admin Password"
|
505 |
)
|
506 |
-
login_btn = gr.Button("Login")
|
507 |
-
login_status = gr.Markdown("Not logged in")
|
508 |
|
509 |
-
def
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
|
|
|
|
514 |
|
515 |
-
|
516 |
-
|
517 |
-
inputs=[
|
518 |
-
outputs=[
|
519 |
)
|
520 |
|
521 |
-
|
522 |
-
|
523 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
524 |
|
525 |
-
with gr.
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
step=10,
|
531 |
-
label="Max Results"
|
532 |
)
|
533 |
-
|
534 |
-
|
|
|
|
|
535 |
|
536 |
-
#
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
542 |
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
outputs=[admin_required, scan_group]
|
548 |
)
|
549 |
|
550 |
-
|
551 |
-
|
552 |
-
inputs=[
|
553 |
-
outputs=[
|
554 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
555 |
|
556 |
return app
|
557 |
|
558 |
-
#
|
559 |
-
def main():
|
560 |
-
app = create_ui()
|
561 |
-
app.launch()
|
562 |
-
|
563 |
if __name__ == "__main__":
|
564 |
-
|
|
|
|
1 |
import os
|
2 |
import logging
|
3 |
+
import asyncio
|
4 |
import bcrypt
|
5 |
+
import requests
|
|
|
|
|
|
|
|
|
|
|
6 |
import shodan
|
7 |
+
import gradio as gr
|
8 |
+
from typing import List, Dict, Any, Tuple, Optional
|
9 |
+
from datasets import load_dataset, Dataset
|
10 |
+
from huggingface_hub import HfApi, create_repo
|
11 |
|
12 |
+
# Setup logging
|
13 |
logging.basicConfig(
|
14 |
level=logging.INFO,
|
15 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
16 |
)
|
17 |
logger = logging.getLogger(__name__)
|
18 |
|
19 |
+
# Environment variable validation
|
20 |
+
required_env_vars = ["SHODAN_API_KEY", "ADMIN_PASSWORD", "SHODAN_QUERY"]
|
21 |
+
for var in required_env_vars:
|
22 |
+
if not os.environ.get(var):
|
23 |
+
logger.warning(f"Environment variable {var} is not set")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
# Dataset functions
|
26 |
+
def get_or_create_dataset(dataset_name: str = "latterworks/llama_checker_results") -> Optional[Dataset]:
|
27 |
"""
|
28 |
+
Load the dataset or create it if it doesn't exist.
|
29 |
|
30 |
Args:
|
31 |
+
dataset_name: The name of the dataset on Hugging Face Hub
|
|
|
32 |
|
33 |
Returns:
|
34 |
+
The dataset or None if there was an error
|
35 |
"""
|
36 |
try:
|
37 |
# Try to load the dataset
|
38 |
+
token = os.environ.get("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
try:
|
41 |
+
dataset_dict = load_dataset(dataset_name, token=token)
|
42 |
+
if "train" in dataset_dict:
|
43 |
+
return dataset_dict["train"]
|
44 |
+
else:
|
45 |
+
# If no "train" split, try to use the first available split
|
46 |
+
first_split = next(iter(dataset_dict))
|
47 |
+
return dataset_dict[first_split]
|
48 |
+
except Exception as e:
|
49 |
+
logger.error(f"Failed to load dataset {dataset_name}: {e}")
|
50 |
+
|
51 |
+
# Create the dataset
|
52 |
+
try:
|
53 |
+
# Create the repository
|
54 |
+
hf_api = HfApi(token=token)
|
55 |
+
create_repo(repo_id=dataset_name, repo_type="dataset", token=token)
|
56 |
+
|
57 |
+
# Create empty dataset with the correct schema
|
58 |
+
empty_dataset = Dataset.from_dict({
|
59 |
+
"ip": [],
|
60 |
+
"port": [],
|
61 |
+
"country": [],
|
62 |
+
"region": [],
|
63 |
+
"org": [],
|
64 |
+
"models": []
|
65 |
+
})
|
66 |
+
|
67 |
+
# Push to Hub
|
68 |
+
empty_dataset.push_to_hub(dataset_name, token=token)
|
69 |
+
|
70 |
+
return empty_dataset
|
71 |
+
except Exception as create_e:
|
72 |
+
logger.error(f"Failed to create dataset: {create_e}")
|
73 |
+
return None
|
74 |
+
except Exception as e:
|
75 |
+
logger.exception(f"Unexpected error in get_or_create_dataset: {e}")
|
76 |
+
return None
|
77 |
|
78 |
+
def update_dataset(dataset: Dataset, new_entries: List[Dict[str, Any]]) -> Optional[Dataset]:
|
79 |
"""
|
80 |
+
Update the dataset with new entries from Shodan scan.
|
81 |
|
82 |
Args:
|
83 |
dataset: The dataset to update
|
84 |
+
new_entries: List of new entries to add or update in the dataset
|
85 |
|
86 |
Returns:
|
87 |
+
The updated dataset or None if there was an error
|
88 |
"""
|
89 |
+
if dataset is None:
|
90 |
+
logger.error("Cannot update None dataset")
|
91 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
+
# Convert dataset to dictionaries for easier manipulation
|
94 |
+
dataset_dict = dataset.to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
# Add new entries
|
97 |
+
for entry in new_entries:
|
98 |
+
ip = entry.get("ip")
|
99 |
+
port = entry.get("port")
|
100 |
|
101 |
+
# Check if this IP:port combination already exists
|
102 |
+
found = False
|
103 |
+
for i, (existing_ip, existing_port) in enumerate(zip(dataset_dict["ip"], dataset_dict["port"])):
|
104 |
+
if existing_ip == ip and existing_port == port:
|
105 |
+
# Update the entry
|
106 |
+
dataset_dict["country"][i] = entry.get("country", dataset_dict["country"][i])
|
107 |
+
dataset_dict["region"][i] = entry.get("region", dataset_dict["region"][i])
|
108 |
+
dataset_dict["org"][i] = entry.get("org", dataset_dict["org"][i])
|
109 |
+
dataset_dict["models"][i] = entry.get("models", dataset_dict["models"][i])
|
110 |
+
found = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
break
|
112 |
|
113 |
+
if not found:
|
114 |
+
# Add as a new entry
|
115 |
+
dataset_dict["ip"].append(entry.get("ip", ""))
|
116 |
+
dataset_dict["port"].append(entry.get("port", 0))
|
117 |
+
dataset_dict["country"].append(entry.get("country", ""))
|
118 |
+
dataset_dict["region"].append(entry.get("region", ""))
|
119 |
+
dataset_dict["org"].append(entry.get("org", ""))
|
120 |
+
dataset_dict["models"].append(entry.get("models", []))
|
121 |
+
|
122 |
+
# Convert back to Dataset
|
123 |
+
updated_dataset = Dataset.from_dict(dataset_dict)
|
124 |
+
|
125 |
+
# Push to Hub
|
126 |
+
token = os.environ.get("HF_TOKEN")
|
127 |
+
updated_dataset.push_to_hub("latterworks/llama_checker_results", token=token)
|
128 |
+
|
129 |
+
return updated_dataset
|
130 |
|
131 |
# Ollama endpoint checking
|
132 |
+
async def check_ollama_endpoint(ip: str, port: int) -> Dict[str, Any]:
|
133 |
"""
|
134 |
+
Check a single Ollama endpoint and retrieve model information.
|
135 |
|
136 |
Args:
|
137 |
+
ip: The IP address of the Ollama instance
|
138 |
+
port: The port of the Ollama instance
|
139 |
|
140 |
Returns:
|
141 |
+
A dictionary with IP, port, models, and status information
|
142 |
"""
|
|
|
|
|
143 |
url = f"http://{ip}:{port}/api/tags"
|
144 |
+
models = []
|
145 |
+
status = "success"
|
146 |
|
147 |
try:
|
148 |
+
response = requests.get(url, timeout=5)
|
149 |
response.raise_for_status()
|
150 |
|
151 |
data = response.json()
|
|
|
152 |
if "models" in data:
|
153 |
+
for model_data in data["models"]:
|
154 |
+
details = model_data.get("details", {})
|
|
|
155 |
model_info = {
|
156 |
+
"name": model_data.get("name", ""),
|
157 |
+
"family": details.get("family", ""),
|
158 |
+
"parameter_size": details.get("parameter_size", ""),
|
159 |
+
"quantization_level": details.get("quantization_level", ""),
|
160 |
+
"digest": model_data.get("digest", ""),
|
161 |
+
"modified_at": model_data.get("modified_at", ""),
|
162 |
+
"size": model_data.get("size", 0)
|
163 |
}
|
164 |
+
models.append(model_info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
except requests.exceptions.RequestException as e:
|
166 |
+
logger.error(f"Network error when checking {ip}:{port}: {e}")
|
167 |
+
status = "connection failed"
|
168 |
except ValueError as e:
|
169 |
logger.error(f"Invalid JSON from {ip}:{port}: {e}")
|
170 |
+
status = "invalid json"
|
171 |
except Exception as e:
|
172 |
+
logger.exception(f"Unexpected error when checking {ip}:{port}")
|
173 |
+
status = "unexpected error"
|
174 |
|
175 |
+
return {
|
176 |
+
"ip": ip,
|
177 |
+
"port": port,
|
178 |
+
"models": models,
|
179 |
+
"status": status
|
180 |
+
}
|
181 |
|
182 |
+
async def check_ollama_endpoints(entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
|
183 |
"""
|
184 |
+
Check multiple Ollama endpoints concurrently.
|
185 |
|
186 |
Args:
|
187 |
+
entries: List of entries containing IP and port information
|
|
|
188 |
|
189 |
Returns:
|
190 |
+
List of updated entries with model information
|
191 |
"""
|
192 |
+
tasks = []
|
193 |
+
for entry in entries:
|
194 |
+
task = asyncio.create_task(check_ollama_endpoint(entry["ip"], entry["port"]))
|
195 |
+
tasks.append((entry, task))
|
196 |
|
197 |
+
results = []
|
198 |
+
for entry, task in tasks:
|
199 |
+
try:
|
200 |
+
result = await task
|
201 |
+
# Merge the result with the original entry
|
202 |
+
# This preserves fields like country, region, and org
|
203 |
+
updated_entry = entry.copy()
|
204 |
+
updated_entry["models"] = result["models"]
|
205 |
+
updated_entry["status"] = result["status"]
|
206 |
+
results.append(updated_entry)
|
207 |
+
except Exception as e:
|
208 |
+
logger.error(f"Error checking endpoint {entry.get('ip')}:{entry.get('port')}: {e}")
|
209 |
+
entry["models"] = []
|
210 |
+
entry["status"] = "error"
|
211 |
+
results.append(entry)
|
212 |
+
|
213 |
+
return results
|
214 |
|
215 |
+
# Shodan scanning
|
216 |
+
def run_shodan_scan() -> List[Dict[str, Any]]:
|
217 |
"""
|
218 |
+
Run a Shodan scan for Ollama instances.
|
219 |
|
220 |
Returns:
|
221 |
+
List of entries containing IP, port, and location information
|
222 |
"""
|
223 |
+
api_key = os.environ.get("SHODAN_API_KEY")
|
224 |
+
if not api_key:
|
225 |
+
logger.error("SHODAN_API_KEY environment variable not set")
|
226 |
+
return []
|
227 |
+
|
228 |
+
query = os.environ.get("SHODAN_QUERY", "product:Ollama port:11434")
|
229 |
+
|
230 |
+
try:
|
231 |
+
api = shodan.Shodan(api_key)
|
232 |
+
results = api.search(query, limit=1000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
|
234 |
+
entries = []
|
235 |
+
for result in results["matches"]:
|
236 |
+
entry = {
|
237 |
+
"ip": result.get("ip_str", ""),
|
238 |
+
"port": result.get("port", 0),
|
239 |
+
"country": result.get("location", {}).get("country_name", ""),
|
240 |
+
"region": result.get("location", {}).get("region_name", ""),
|
241 |
+
"org": result.get("org", ""),
|
242 |
+
"models": []
|
243 |
+
}
|
244 |
+
entries.append(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
+
return entries
|
247 |
+
except shodan.APIError as e:
|
248 |
+
logger.error(f"Shodan API error: {e}")
|
249 |
+
return []
|
250 |
+
except Exception as e:
|
251 |
+
logger.exception(f"Unexpected error in run_shodan_scan")
|
252 |
+
return []
|
253 |
+
|
254 |
+
# Password validation
|
255 |
+
def validate_admin_password(password: str) -> bool:
|
256 |
+
"""
|
257 |
+
Validate the admin password.
|
258 |
|
259 |
+
Args:
|
260 |
+
password: The entered password to validate
|
|
|
|
|
261 |
|
262 |
+
Returns:
|
263 |
+
True if the password is valid, False otherwise
|
264 |
+
"""
|
265 |
+
stored_password = os.environ.get("ADMIN_PASSWORD")
|
266 |
+
if not stored_password:
|
267 |
+
logger.error("ADMIN_PASSWORD environment variable not set")
|
268 |
+
return False
|
269 |
+
|
270 |
+
# If the stored password starts with '$2b', it's a bcrypt hash
|
271 |
+
if stored_password.startswith('$2b'):
|
272 |
+
return bcrypt.checkpw(password.encode('utf-8'), stored_password.encode('utf-8'))
|
273 |
+
else:
|
274 |
+
# Otherwise, do a direct comparison
|
275 |
+
return password == stored_password
|
276 |
+
|
277 |
+
# Gradio UI functions
|
278 |
+
def get_model_families_and_sizes(dataset: Dataset) -> Tuple[List[str], List[str]]:
|
279 |
+
"""
|
280 |
+
Extract all unique model families and parameter sizes from the dataset.
|
281 |
+
|
282 |
+
Args:
|
283 |
+
dataset: The dataset to extract from
|
|
|
|
|
|
|
284 |
|
285 |
+
Returns:
|
286 |
+
Tuple of (families, parameter_sizes)
|
287 |
+
"""
|
288 |
+
if dataset is None:
|
289 |
+
return [], []
|
290 |
|
291 |
+
families = set()
|
292 |
+
parameter_sizes = set()
|
293 |
+
|
294 |
+
for i in range(len(dataset)):
|
295 |
+
models = dataset[i]["models"]
|
296 |
+
if models:
|
297 |
+
for model in models:
|
298 |
+
family = model.get("family")
|
299 |
+
param_size = model.get("parameter_size")
|
300 |
+
|
301 |
+
if family:
|
302 |
+
families.add(family)
|
303 |
+
if param_size:
|
304 |
+
parameter_sizes.add(param_size)
|
305 |
|
306 |
+
return sorted(list(families)), sorted(list(parameter_sizes))
|
307 |
+
|
308 |
+
def search_models(family: str, parameter_size: str, name: str, dataset: Dataset, is_admin: bool) -> Tuple[List[Dict], Dict]:
|
309 |
+
"""
|
310 |
+
Search for models in the dataset based on filters.
|
311 |
+
|
312 |
+
Args:
|
313 |
+
family: Filter by model family
|
314 |
+
parameter_size: Filter by parameter size
|
315 |
+
name: Filter by model name
|
316 |
+
dataset: The dataset to search in
|
317 |
+
is_admin: Whether the user is an admin
|
318 |
|
319 |
+
Returns:
|
320 |
+
Tuple of (filtered_models, empty_details)
|
321 |
+
"""
|
322 |
+
if dataset is None:
|
323 |
+
return [], {}
|
324 |
+
|
325 |
+
# Collect all models from the dataset
|
326 |
+
all_models = []
|
327 |
+
for i in range(len(dataset)):
|
328 |
+
ip = dataset[i]["ip"]
|
329 |
+
port = dataset[i]["port"]
|
330 |
+
models = dataset[i]["models"]
|
331 |
|
332 |
+
if models:
|
333 |
+
for model in models:
|
334 |
+
model_copy = model.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
|
336 |
+
# Add source info if admin
|
337 |
+
if is_admin:
|
338 |
+
model_copy["source_ip"] = ip
|
339 |
+
model_copy["source_port"] = port
|
340 |
+
|
341 |
+
# Calculate size in GB
|
342 |
+
if "size" in model:
|
343 |
+
model_copy["size_gb"] = round(model["size"] / (1024**3), 2)
|
344 |
+
else:
|
345 |
+
model_copy["size_gb"] = 0
|
346 |
+
|
347 |
+
all_models.append(model_copy)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
348 |
|
349 |
+
# Apply filters
|
350 |
+
filtered_models = all_models
|
351 |
+
if family:
|
352 |
+
filtered_models = [m for m in filtered_models if m.get("family") == family]
|
353 |
+
if parameter_size:
|
354 |
+
filtered_models = [m for m in filtered_models if m.get("parameter_size") == parameter_size]
|
355 |
+
if name:
|
356 |
+
filtered_models = [m for m in filtered_models if name.lower() in m.get("name", "").lower()]
|
357 |
+
|
358 |
+
return filtered_models, {}
|
359 |
+
|
360 |
+
def select_model(evt: gr.SelectData, models: List[Dict]) -> Dict:
|
361 |
+
"""
|
362 |
+
Handle model selection from the table.
|
363 |
+
|
364 |
+
Args:
|
365 |
+
evt: The selection event
|
366 |
+
models: The list of models
|
367 |
|
368 |
+
Returns:
|
369 |
+
The selected model details
|
370 |
+
"""
|
371 |
+
if not models or evt.index >= len(models):
|
372 |
+
return {}
|
373 |
+
|
374 |
+
return models[evt.index]
|
375 |
+
|
376 |
+
async def scan_worker() -> str:
|
377 |
+
"""
|
378 |
+
Run the complete scan workflow.
|
379 |
+
|
380 |
+
Returns:
|
381 |
+
Status message
|
382 |
+
"""
|
383 |
+
# Run Shodan scan
|
384 |
+
entries = run_shodan_scan()
|
385 |
+
if not entries:
|
386 |
+
return "No Ollama instances found or scan failed"
|
387 |
+
|
388 |
+
# Check endpoints
|
389 |
+
updated_entries = await check_ollama_endpoints(entries)
|
390 |
+
|
391 |
+
# Update dataset
|
392 |
+
dataset = get_or_create_dataset()
|
393 |
+
if dataset is not None:
|
394 |
+
update_dataset(dataset, updated_entries)
|
395 |
+
return f"Scan completed. Found {len(entries)} Ollama instances."
|
396 |
+
else:
|
397 |
+
return "Scan completed but failed to update dataset"
|
398 |
+
|
399 |
+
# Main application
|
400 |
+
def create_app():
|
401 |
+
# Load the dataset
|
402 |
+
dataset = get_or_create_dataset()
|
403 |
+
|
404 |
+
# Get model families and parameter sizes
|
405 |
+
families, parameter_sizes = [], []
|
406 |
+
if dataset is not None:
|
407 |
+
families, parameter_sizes = get_model_families_and_sizes(dataset)
|
408 |
+
|
409 |
+
with gr.Blocks(title="Ollama Instance Explorer") as app:
|
410 |
+
# Admin login section
|
411 |
+
with gr.Row():
|
412 |
+
admin_password = gr.Textbox(
|
413 |
+
label="Admin Password",
|
414 |
+
type="password",
|
415 |
+
placeholder="Enter admin password"
|
|
|
416 |
)
|
417 |
+
login_button = gr.Button("Login")
|
418 |
+
login_status = gr.Textbox(
|
419 |
+
label="Login Status",
|
420 |
+
value="",
|
421 |
+
interactive=False
|
422 |
)
|
423 |
|
424 |
+
# Admin state
|
425 |
+
is_admin = gr.State(False)
|
426 |
+
|
427 |
+
# Admin-only section
|
428 |
+
with gr.Tab("Shodan Scan", visible=False) as admin_tab:
|
429 |
+
scan_button = gr.Button("Start Scan")
|
430 |
+
scan_status = gr.Textbox(
|
431 |
+
label="Scan Status",
|
432 |
value="",
|
433 |
+
interactive=False
|
|
|
434 |
)
|
|
|
|
|
435 |
|
436 |
+
def on_scan_click():
|
437 |
+
# We can't use async directly with Gradio, so use asyncio.run
|
438 |
+
try:
|
439 |
+
return asyncio.run(scan_worker())
|
440 |
+
except Exception as e:
|
441 |
+
logger.exception("Error during scan")
|
442 |
+
return f"Error during scan: {str(e)}"
|
443 |
|
444 |
+
scan_button.click(
|
445 |
+
on_scan_click,
|
446 |
+
inputs=[],
|
447 |
+
outputs=[scan_status]
|
448 |
)
|
449 |
|
450 |
+
# Public section
|
451 |
+
with gr.Tab("Browse Models"):
|
452 |
+
with gr.Row():
|
453 |
+
family_filter = gr.Dropdown(
|
454 |
+
label="Family",
|
455 |
+
choices=[""] + families,
|
456 |
+
value=""
|
457 |
+
)
|
458 |
+
parameter_size_filter = gr.Dropdown(
|
459 |
+
label="Parameter Size",
|
460 |
+
choices=[""] + parameter_sizes,
|
461 |
+
value=""
|
462 |
+
)
|
463 |
+
name_filter = gr.Textbox(
|
464 |
+
label="Name Search",
|
465 |
+
placeholder="Enter model name to search"
|
466 |
+
)
|
467 |
+
|
468 |
+
search_button = gr.Button("Search")
|
469 |
|
470 |
+
with gr.Row():
|
471 |
+
models_table = gr.DataFrame(
|
472 |
+
headers=["name", "family", "parameter_size", "quantization_level", "size_gb"],
|
473 |
+
datatype=["str", "str", "str", "str", "number"],
|
474 |
+
interactive=False
|
|
|
|
|
475 |
)
|
476 |
+
model_details = gr.JSON(label="Model Details")
|
477 |
+
|
478 |
+
def on_search(family, parameter_size, name, admin_status):
|
479 |
+
models, _ = search_models(family, parameter_size, name, dataset, admin_status)
|
480 |
|
481 |
+
# Create DataFrame-friendly format
|
482 |
+
df_data = []
|
483 |
+
for model in models:
|
484 |
+
row = {
|
485 |
+
"name": model.get("name", ""),
|
486 |
+
"family": model.get("family", ""),
|
487 |
+
"parameter_size": model.get("parameter_size", ""),
|
488 |
+
"quantization_level": model.get("quantization_level", ""),
|
489 |
+
"size_gb": model.get("size_gb", 0)
|
490 |
+
}
|
491 |
+
df_data.append(row)
|
492 |
+
|
493 |
+
return df_data, {}
|
494 |
|
495 |
+
search_button.click(
|
496 |
+
on_search,
|
497 |
+
inputs=[family_filter, parameter_size_filter, name_filter, is_admin],
|
498 |
+
outputs=[models_table, model_details]
|
|
|
499 |
)
|
500 |
|
501 |
+
models_table.select(
|
502 |
+
select_model,
|
503 |
+
inputs=[models_table],
|
504 |
+
outputs=[model_details]
|
505 |
)
|
506 |
+
|
507 |
+
# Handle login
|
508 |
+
def on_login(password):
|
509 |
+
if validate_admin_password(password):
|
510 |
+
return True, gr.update(visible=True), "Login successful"
|
511 |
+
else:
|
512 |
+
return False, gr.update(visible=False), "Invalid password"
|
513 |
+
|
514 |
+
login_button.click(
|
515 |
+
on_login,
|
516 |
+
inputs=[admin_password],
|
517 |
+
outputs=[is_admin, admin_tab, login_status]
|
518 |
+
)
|
519 |
+
|
520 |
+
# Initial search on load
|
521 |
+
app.load(
|
522 |
+
lambda: on_search("", "", "", False),
|
523 |
+
inputs=None,
|
524 |
+
outputs=[models_table, model_details]
|
525 |
+
)
|
526 |
|
527 |
return app
|
528 |
|
529 |
+
# Run the app
|
|
|
|
|
|
|
|
|
530 |
if __name__ == "__main__":
|
531 |
+
app = create_app()
|
532 |
+
app.launch()
|