Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import subprocess
|
3 |
+
import pyshark
|
4 |
+
from selenium import webdriver
|
5 |
+
from selenium.webdriver.chrome.service import Service
|
6 |
+
from webdriver_manager.chrome import ChromeDriverManager
|
7 |
+
from selenium.webdriver.chrome.options import Options
|
8 |
+
import numpy as np
|
9 |
+
import joblib
|
10 |
+
import pandas as pd
|
11 |
+
import scapy.all as scapy
|
12 |
+
import requests
|
13 |
+
import gradio as gr
|
14 |
+
|
15 |
+
# Load the pre-trained model and feature names
|
16 |
+
model = joblib.load('extratrees.pkl')
|
17 |
+
all_features = joblib.load('featurenames.pkl')
|
18 |
+
|
19 |
+
# Modify the capture duration to a longer period
|
20 |
+
def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
|
21 |
+
try:
|
22 |
+
# Start tshark to capture packets
|
23 |
+
tshark_process = subprocess.Popen(
|
24 |
+
["tshark", "-i", "any", "-f", "tcp port 80 or tcp port 443 or port 53", "-w", capture_file],
|
25 |
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE
|
26 |
+
)
|
27 |
+
# Wait for tshark to start
|
28 |
+
time.sleep(2)
|
29 |
+
|
30 |
+
# Set up Chrome options
|
31 |
+
chrome_options = Options()
|
32 |
+
chrome_options.add_argument("--headless") # Run Chrome in headless mode
|
33 |
+
chrome_options.add_argument("--no-sandbox")
|
34 |
+
chrome_options.add_argument("--disable-dev-shm-usage")
|
35 |
+
|
36 |
+
# Use Selenium to visit the URL
|
37 |
+
service = Service(ChromeDriverManager().install()) # Ensure the driver is installed
|
38 |
+
driver = webdriver.Chrome(service=service, options=chrome_options)
|
39 |
+
driver.get(url)
|
40 |
+
|
41 |
+
# Capture packets for the specified duration
|
42 |
+
time.sleep(capture_duration)
|
43 |
+
|
44 |
+
# Close the browser
|
45 |
+
driver.quit()
|
46 |
+
|
47 |
+
# Stop tshark
|
48 |
+
tshark_process.terminate()
|
49 |
+
tshark_process.wait()
|
50 |
+
|
51 |
+
# Read captured packets using pyshark for detailed packet information
|
52 |
+
packets = []
|
53 |
+
cap = pyshark.FileCapture(capture_file)
|
54 |
+
for packet in cap:
|
55 |
+
packets.append(str(packet))
|
56 |
+
cap.close()
|
57 |
+
return packets
|
58 |
+
except Exception as e:
|
59 |
+
print(f"Error in capturing packets: {e}")
|
60 |
+
return None
|
61 |
+
|
62 |
+
# Function to extract features from captured packets
|
63 |
+
def extract_features(capture_file):
|
64 |
+
try:
|
65 |
+
cap = pyshark.FileCapture(capture_file)
|
66 |
+
|
67 |
+
# Initialize features
|
68 |
+
features = {feature: 0 for feature in all_features}
|
69 |
+
total_packets = 0
|
70 |
+
total_bytes = 0
|
71 |
+
start_time = None
|
72 |
+
end_time = None
|
73 |
+
packet_lengths = []
|
74 |
+
protocol_counts = {'TCP': 0, 'UDP': 0, 'ICMP': 0}
|
75 |
+
tcp_flags = {'SYN': 0, 'ACK': 0, 'FIN': 0, 'RST': 0}
|
76 |
+
|
77 |
+
for packet in cap:
|
78 |
+
total_packets += 1
|
79 |
+
total_bytes += int(packet.length)
|
80 |
+
packet_lengths.append(int(packet.length))
|
81 |
+
timestamp = float(packet.sniff_time.timestamp())
|
82 |
+
|
83 |
+
if start_time is None:
|
84 |
+
start_time = timestamp
|
85 |
+
end_time = timestamp
|
86 |
+
|
87 |
+
# Counting protocols and flags
|
88 |
+
if hasattr(packet, 'tcp'):
|
89 |
+
protocol_counts['TCP'] += 1
|
90 |
+
if 'SYN' in packet.tcp.flags:
|
91 |
+
tcp_flags['SYN'] += 1
|
92 |
+
if 'ACK' in packet.tcp.flags:
|
93 |
+
tcp_flags['ACK'] += 1
|
94 |
+
if 'FIN' in packet.tcp.flags:
|
95 |
+
tcp_flags['FIN'] += 1
|
96 |
+
if 'RST' in packet.tcp.flags:
|
97 |
+
tcp_flags['RST'] += 1
|
98 |
+
elif hasattr(packet, 'udp'):
|
99 |
+
protocol_counts['UDP'] += 1
|
100 |
+
elif hasattr(packet, 'icmp'):
|
101 |
+
protocol_counts['ICMP'] += 1
|
102 |
+
|
103 |
+
duration = end_time - start_time if start_time and end_time else 0
|
104 |
+
|
105 |
+
# Populate extracted features
|
106 |
+
features.update({
|
107 |
+
"Flow Duration": duration,
|
108 |
+
"Total Packets": total_packets,
|
109 |
+
"Total Bytes": total_bytes,
|
110 |
+
"Fwd Packet Length Mean": np.mean(packet_lengths) if packet_lengths else 0,
|
111 |
+
"Bwd Packet Length Mean": 0, # Assuming no distinction here
|
112 |
+
"Flow Bytes/s": total_bytes / duration if duration else 0,
|
113 |
+
"Flow Packets/s": total_packets / duration if duration else 0,
|
114 |
+
"Average Packet Size": np.mean(packet_lengths) if packet_lengths else 0,
|
115 |
+
"Min Packet Size": min(packet_lengths) if packet_lengths else 0,
|
116 |
+
"Max Packet Size": max(packet_lengths) if packet_lengths else 0,
|
117 |
+
"Packet Length Variance": np.var(packet_lengths) if len(packet_lengths) > 1 else 0,
|
118 |
+
"TCP Packets": protocol_counts['TCP'],
|
119 |
+
"UDP Packets": protocol_counts['UDP'],
|
120 |
+
"ICMP Packets": protocol_counts['ICMP'],
|
121 |
+
"TCP SYN Flags": tcp_flags['SYN'],
|
122 |
+
"TCP ACK Flags": tcp_flags['ACK'],
|
123 |
+
"TCP FIN Flags": tcp_flags['FIN'],
|
124 |
+
"TCP RST Flags": tcp_flags['RST']
|
125 |
+
})
|
126 |
+
|
127 |
+
return features
|
128 |
+
except Exception as e:
|
129 |
+
print(f"Error in extracting features: {e}")
|
130 |
+
return None
|
131 |
+
|
132 |
+
# Function to compare features with CIC-IDS-2017 dataset
|
133 |
+
def compare_with_dataset(packet_features):
|
134 |
+
# Convert the extracted features into a format that the model can use
|
135 |
+
packet_features_series = pd.Series(packet_features)
|
136 |
+
packet_features_series = packet_features_series.reindex(all_features, fill_value=0)
|
137 |
+
# Predict using the loaded model
|
138 |
+
prediction = model.predict([packet_features_series])[0]
|
139 |
+
return "benign" if prediction == 0 else "malicious"
|
140 |
+
|
141 |
+
# Analyze the URL and predict if it's malicious
|
142 |
+
def analyze_url(url):
|
143 |
+
try:
|
144 |
+
# Capture packets using Scapy (updating to capture more specific traffic)
|
145 |
+
response = requests.get(url)
|
146 |
+
packets = scapy.sniff(count=100) # Capture packets with Scapy
|
147 |
+
capture_file = 'capture.pcap'
|
148 |
+
scapy.wrpcap(capture_file, packets)
|
149 |
+
|
150 |
+
# Extract features from the captured packets
|
151 |
+
packet_features = extract_features(capture_file)
|
152 |
+
if packet_features is not None:
|
153 |
+
prediction = compare_with_dataset(packet_features)
|
154 |
+
|
155 |
+
# Use Pyshark to capture HTTP/HTTPS/DNS packet details
|
156 |
+
http_dns_packets = capture_packets(url)
|
157 |
+
|
158 |
+
captured_packets = [str(packet) for packet in packets]
|
159 |
+
return prediction, {"scapy_packets": captured_packets, "http_dns_packets": http_dns_packets}
|
160 |
+
else:
|
161 |
+
return "Error in feature extraction", []
|
162 |
+
except Exception as e:
|
163 |
+
return str(e), []
|
164 |
+
|
165 |
+
# Define the Gradio interface
|
166 |
+
iface = gr.Interface(
|
167 |
+
fn=analyze_url,
|
168 |
+
inputs=gr.Textbox(label="Enter URL"),
|
169 |
+
outputs=[gr.Textbox(label="Prediction"), gr.JSON(label="Captured Packets")],
|
170 |
+
title="URL Malicious Activity Detection",
|
171 |
+
description="Enter a URL to predict if it's malicious or benign by analyzing the network traffic."
|
172 |
+
)
|
173 |
+
|
174 |
+
# Launch the interface
|
175 |
+
iface.launch(debug=True)
|