Spaces:
Sleeping
Sleeping
File size: 6,715 Bytes
a9280ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import time
import subprocess
import pyshark
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import numpy as np
import joblib
import pandas as pd
import scapy.all as scapy
import requests
import gradio as gr
# Load the pre-trained model and feature names
model = joblib.load('extratrees.pkl')
all_features = joblib.load('featurenames.pkl')
# Modify the capture duration to a longer period
def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
try:
# Start tshark to capture packets
tshark_process = subprocess.Popen(
["tshark", "-i", "any", "-f", "tcp port 80 or tcp port 443 or port 53", "-w", capture_file],
stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
# Wait for tshark to start
time.sleep(2)
# Set up Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Run Chrome in headless mode
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
# Use Selenium to visit the URL
service = Service(ChromeDriverManager().install()) # Ensure the driver is installed
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get(url)
# Capture packets for the specified duration
time.sleep(capture_duration)
# Close the browser
driver.quit()
# Stop tshark
tshark_process.terminate()
tshark_process.wait()
# Read captured packets using pyshark for detailed packet information
packets = []
cap = pyshark.FileCapture(capture_file)
for packet in cap:
packets.append(str(packet))
cap.close()
return packets
except Exception as e:
print(f"Error in capturing packets: {e}")
return None
# Function to extract features from captured packets
def extract_features(capture_file):
try:
cap = pyshark.FileCapture(capture_file)
# Initialize features
features = {feature: 0 for feature in all_features}
total_packets = 0
total_bytes = 0
start_time = None
end_time = None
packet_lengths = []
protocol_counts = {'TCP': 0, 'UDP': 0, 'ICMP': 0}
tcp_flags = {'SYN': 0, 'ACK': 0, 'FIN': 0, 'RST': 0}
for packet in cap:
total_packets += 1
total_bytes += int(packet.length)
packet_lengths.append(int(packet.length))
timestamp = float(packet.sniff_time.timestamp())
if start_time is None:
start_time = timestamp
end_time = timestamp
# Counting protocols and flags
if hasattr(packet, 'tcp'):
protocol_counts['TCP'] += 1
if 'SYN' in packet.tcp.flags:
tcp_flags['SYN'] += 1
if 'ACK' in packet.tcp.flags:
tcp_flags['ACK'] += 1
if 'FIN' in packet.tcp.flags:
tcp_flags['FIN'] += 1
if 'RST' in packet.tcp.flags:
tcp_flags['RST'] += 1
elif hasattr(packet, 'udp'):
protocol_counts['UDP'] += 1
elif hasattr(packet, 'icmp'):
protocol_counts['ICMP'] += 1
duration = end_time - start_time if start_time and end_time else 0
# Populate extracted features
features.update({
"Flow Duration": duration,
"Total Packets": total_packets,
"Total Bytes": total_bytes,
"Fwd Packet Length Mean": np.mean(packet_lengths) if packet_lengths else 0,
"Bwd Packet Length Mean": 0, # Assuming no distinction here
"Flow Bytes/s": total_bytes / duration if duration else 0,
"Flow Packets/s": total_packets / duration if duration else 0,
"Average Packet Size": np.mean(packet_lengths) if packet_lengths else 0,
"Min Packet Size": min(packet_lengths) if packet_lengths else 0,
"Max Packet Size": max(packet_lengths) if packet_lengths else 0,
"Packet Length Variance": np.var(packet_lengths) if len(packet_lengths) > 1 else 0,
"TCP Packets": protocol_counts['TCP'],
"UDP Packets": protocol_counts['UDP'],
"ICMP Packets": protocol_counts['ICMP'],
"TCP SYN Flags": tcp_flags['SYN'],
"TCP ACK Flags": tcp_flags['ACK'],
"TCP FIN Flags": tcp_flags['FIN'],
"TCP RST Flags": tcp_flags['RST']
})
return features
except Exception as e:
print(f"Error in extracting features: {e}")
return None
# Function to compare features with CIC-IDS-2017 dataset
def compare_with_dataset(packet_features):
# Convert the extracted features into a format that the model can use
packet_features_series = pd.Series(packet_features)
packet_features_series = packet_features_series.reindex(all_features, fill_value=0)
# Predict using the loaded model
prediction = model.predict([packet_features_series])[0]
return "benign" if prediction == 0 else "malicious"
# Analyze the URL and predict if it's malicious
def analyze_url(url):
try:
# Capture packets using Scapy (updating to capture more specific traffic)
response = requests.get(url)
packets = scapy.sniff(count=100) # Capture packets with Scapy
capture_file = 'capture.pcap'
scapy.wrpcap(capture_file, packets)
# Extract features from the captured packets
packet_features = extract_features(capture_file)
if packet_features is not None:
prediction = compare_with_dataset(packet_features)
# Use Pyshark to capture HTTP/HTTPS/DNS packet details
http_dns_packets = capture_packets(url)
captured_packets = [str(packet) for packet in packets]
return prediction, {"scapy_packets": captured_packets, "http_dns_packets": http_dns_packets}
else:
return "Error in feature extraction", []
except Exception as e:
return str(e), []
# Define the Gradio interface
iface = gr.Interface(
fn=analyze_url,
inputs=gr.Textbox(label="Enter URL"),
outputs=[gr.Textbox(label="Prediction"), gr.JSON(label="Captured Packets")],
title="URL Malicious Activity Detection",
description="Enter a URL to predict if it's malicious or benign by analyzing the network traffic."
)
# Launch the interface
iface.launch(debug=True) |