YashMK89 commited on
Commit
a4f5cb7
Β·
verified Β·
1 Parent(s): a41521c

upload malicious url python code

Browse files
Files changed (1) hide show
  1. pages/malicious_url.py +98 -0
pages/malicious_url.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pages/malicious_url.py
2
+
3
+ import streamlit as st
4
+ import numpy as np
5
+ import pandas as pd
6
+ import tensorflow as tf
7
+ from urllib.parse import urlparse
8
+ import re
9
+ import joblib
10
+
11
+ @st.cache_resource
12
+ def load_model_and_scaler():
13
+ model = tf.keras.models.load_model("models/malicious_url_model.h5")
14
+ scaler = joblib.load("models/scaler.pkl")
15
+ return model, scaler
16
+
17
+ model, scaler = load_model_and_scaler()
18
+
19
+ def extract_features(url):
20
+ try:
21
+ parsed_url = urlparse(str(url))
22
+ features = {
23
+ 'url_length': len(str(url)),
24
+ 'hostname_length': len(parsed_url.hostname) if parsed_url.hostname else 0,
25
+ 'path_length': len(parsed_url.path) if parsed_url.path else 0,
26
+ 'query_length': len(parsed_url.query) if parsed_url.query else 0,
27
+ 'fragment_length': len(parsed_url.fragment) if parsed_url.fragment else 0,
28
+ 'num_dots': str(url).count('.'),
29
+ 'num_hyphens': str(url).count('-'),
30
+ 'num_at': str(url).count('@'),
31
+ 'num_question': str(url).count('?'),
32
+ 'num_ampersand': str(url).count('&'),
33
+ 'num_equals': str(url).count('='),
34
+ 'num_exclamation': str(url).count('!'),
35
+ 'num_slash': str(url).count('/'),
36
+ 'num_plus': str(url).count('+'),
37
+ 'num_asterisk': str(url).count('*'),
38
+ 'num_underscore': str(url).count('_'),
39
+ 'num_hash': str(url).count('#'),
40
+ 'num_dollar': str(url).count('$'),
41
+ 'num_percent': str(url).count('%'),
42
+ 'is_https': 1 if parsed_url.scheme == 'https' else 0,
43
+ 'has_http_in_hostname': 1 if parsed_url.hostname and 'http' in parsed_url.hostname else 0,
44
+ 'hostname_is_ip': 1 if parsed_url.hostname and re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', parsed_url.hostname) else 0,
45
+ 'path_depth': str(url).count('/') - 2 if url and urlparse(str(url)).scheme in ['http', 'https'] and str(url).count('/') > 2 else 0
46
+ }
47
+ return pd.Series(features)
48
+ except Exception:
49
+ return pd.Series({
50
+ 'url_length': 0, 'hostname_length': 0, 'path_length': 0,
51
+ 'query_length': 0, 'fragment_length': 0, 'num_dots': 0,
52
+ 'num_hyphens': 0, 'num_at': 0, 'num_question': 0,
53
+ 'num_ampersand': 0, 'num_equals': 0, 'num_exclamation': 0,
54
+ 'num_slash': 0, 'num_plus': 0, 'num_asterisk': 0,
55
+ 'num_underscore': 0, 'num_hash': 0, 'num_dollar': 0,
56
+ 'num_percent': 0, 'is_https': 0, 'has_http_in_hostname': 0,
57
+ 'hostname_is_ip': 0, 'path_depth': 0
58
+ })
59
+
60
+ X_columns = [
61
+ 'url_length', 'hostname_length', 'path_length', 'query_length',
62
+ 'fragment_length', 'num_dots', 'num_hyphens', 'num_at',
63
+ 'num_question', 'num_ampersand', 'num_equals', 'num_exclamation',
64
+ 'num_slash', 'num_plus', 'num_asterisk', 'num_underscore',
65
+ 'num_hash', 'num_dollar', 'num_percent', 'is_https',
66
+ 'has_http_in_hostname', 'hostname_is_ip', 'path_depth'
67
+ ]
68
+
69
+ def app():
70
+ st.title("πŸ”— Malicious URL Detector")
71
+ st.markdown("Enter a URL below to check if it's likely malicious.")
72
+
73
+ url_input = st.text_input(
74
+ "πŸ”— Enter a URL:",
75
+ placeholder="e.g., https://example.com",
76
+ help="Type any URL you want to analyze"
77
+ )
78
+
79
+ if st.button("πŸ” Analyze URL"):
80
+ if not url_input.strip():
81
+ st.warning("Please enter a valid URL.")
82
+ else:
83
+ with st.spinner("Analyzing..."):
84
+ features = extract_features(url_input)
85
+ df_new = pd.DataFrame([features])
86
+ X_new = df_new[X_columns]
87
+ X_new.fillna(-1, inplace=True)
88
+ X_scaled = scaler.transform(X_new)
89
+ prediction = model.predict(X_scaled)
90
+ prob = float(prediction[0][0])
91
+
92
+ if prob > 0.5:
93
+ st.error(f"⚠️ This URL is likely **malicious**. Confidence: `{prob:.4f}`")
94
+ else:
95
+ st.success(f"βœ… This URL appears to be **safe**. Confidence: `{1 - prob:.4f}`")
96
+
97
+ st.markdown("---")
98
+ st.markdown("πŸ’‘ *Model trained on URL-based features like length, special characters, domain patterns, etc.*")