Spaces:

YashMK89
/

Security_Tools

Running

App Files Files Community

YashMK89 commited on Jun 13

Commit

a4f5cb7

verified ·

1 Parent(s): a41521c

upload malicious url python code

Browse files

Files changed (1) hide show

pages/malicious_url.py +98 -0

pages/malicious_url.py ADDED Viewed

	@@ -0,0 +1,98 @@

+# pages/malicious_url.py
+import streamlit as st
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+from urllib.parse import urlparse
+import re
+import joblib
+@st.cache_resource
+def load_model_and_scaler():
+    model = tf.keras.models.load_model("models/malicious_url_model.h5")
+    scaler = joblib.load("models/scaler.pkl")
+    return model, scaler
+model, scaler = load_model_and_scaler()
+def extract_features(url):
+    try:
+        parsed_url = urlparse(str(url))
+        features = {
+            'url_length': len(str(url)),
+            'hostname_length': len(parsed_url.hostname) if parsed_url.hostname else 0,
+            'path_length': len(parsed_url.path) if parsed_url.path else 0,
+            'query_length': len(parsed_url.query) if parsed_url.query else 0,
+            'fragment_length': len(parsed_url.fragment) if parsed_url.fragment else 0,
+            'num_dots': str(url).count('.'),
+            'num_hyphens': str(url).count('-'),
+            'num_at': str(url).count('@'),
+            'num_question': str(url).count('?'),
+            'num_ampersand': str(url).count('&'),
+            'num_equals': str(url).count('='),
+            'num_exclamation': str(url).count('!'),
+            'num_slash': str(url).count('/'),
+            'num_plus': str(url).count('+'),
+            'num_asterisk': str(url).count('*'),
+            'num_underscore': str(url).count('_'),
+            'num_hash': str(url).count('#'),
+            'num_dollar': str(url).count('$'),
+            'num_percent': str(url).count('%'),
+            'is_https': 1 if parsed_url.scheme == 'https' else 0,
+            'has_http_in_hostname': 1 if parsed_url.hostname and 'http' in parsed_url.hostname else 0,
+            'hostname_is_ip': 1 if parsed_url.hostname and re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', parsed_url.hostname) else 0,
+            'path_depth': str(url).count('/') - 2 if url and urlparse(str(url)).scheme in ['http', 'https'] and str(url).count('/') > 2 else 0
+        }
+        return pd.Series(features)
+    except Exception:
+        return pd.Series({
+            'url_length': 0, 'hostname_length': 0, 'path_length': 0,
+            'query_length': 0, 'fragment_length': 0, 'num_dots': 0,
+            'num_hyphens': 0, 'num_at': 0, 'num_question': 0,
+            'num_ampersand': 0, 'num_equals': 0, 'num_exclamation': 0,
+            'num_slash': 0, 'num_plus': 0, 'num_asterisk': 0,
+            'num_underscore': 0, 'num_hash': 0, 'num_dollar': 0,
+            'num_percent': 0, 'is_https': 0, 'has_http_in_hostname': 0,
+            'hostname_is_ip': 0, 'path_depth': 0
+        })
+X_columns = [
+    'url_length', 'hostname_length', 'path_length', 'query_length',
+    'fragment_length', 'num_dots', 'num_hyphens', 'num_at',
+    'num_question', 'num_ampersand', 'num_equals', 'num_exclamation',
+    'num_slash', 'num_plus', 'num_asterisk', 'num_underscore',
+    'num_hash', 'num_dollar', 'num_percent', 'is_https',
+    'has_http_in_hostname', 'hostname_is_ip', 'path_depth'
+]
+def app():
+    st.title("🔗 Malicious URL Detector")
+    st.markdown("Enter a URL below to check if it's likely malicious.")
+    url_input = st.text_input(
+        "🔗 Enter a URL:",
+        placeholder="e.g., https://example.com",
+        help="Type any URL you want to analyze"
+    )
+    if st.button("🔍 Analyze URL"):
+        if not url_input.strip():
+            st.warning("Please enter a valid URL.")
+        else:
+            with st.spinner("Analyzing..."):
+                features = extract_features(url_input)
+                df_new = pd.DataFrame([features])
+                X_new = df_new[X_columns]
+                X_new.fillna(-1, inplace=True)
+                X_scaled = scaler.transform(X_new)
+                prediction = model.predict(X_scaled)
+                prob = float(prediction[0][0])
+                if prob > 0.5:
+                    st.error(f"⚠️ This URL is likely **malicious**. Confidence: `{prob:.4f}`")
+                else:
+                    st.success(f"✅ This URL appears to be **safe**. Confidence: `{1 - prob:.4f}`")
+    st.markdown("---")
+    st.markdown("💡 *Model trained on URL-based features like length, special characters, domain patterns, etc.*")