srinuksv commited on
Commit
a9280ab
·
verified ·
1 Parent(s): 2b1d13b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +175 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import subprocess
3
+ import pyshark
4
+ from selenium import webdriver
5
+ from selenium.webdriver.chrome.service import Service
6
+ from webdriver_manager.chrome import ChromeDriverManager
7
+ from selenium.webdriver.chrome.options import Options
8
+ import numpy as np
9
+ import joblib
10
+ import pandas as pd
11
+ import scapy.all as scapy
12
+ import requests
13
+ import gradio as gr
14
+
15
+ # Load the pre-trained model and feature names
16
+ model = joblib.load('extratrees.pkl')
17
+ all_features = joblib.load('featurenames.pkl')
18
+
19
+ # Modify the capture duration to a longer period
20
+ def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
21
+ try:
22
+ # Start tshark to capture packets
23
+ tshark_process = subprocess.Popen(
24
+ ["tshark", "-i", "any", "-f", "tcp port 80 or tcp port 443 or port 53", "-w", capture_file],
25
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE
26
+ )
27
+ # Wait for tshark to start
28
+ time.sleep(2)
29
+
30
+ # Set up Chrome options
31
+ chrome_options = Options()
32
+ chrome_options.add_argument("--headless") # Run Chrome in headless mode
33
+ chrome_options.add_argument("--no-sandbox")
34
+ chrome_options.add_argument("--disable-dev-shm-usage")
35
+
36
+ # Use Selenium to visit the URL
37
+ service = Service(ChromeDriverManager().install()) # Ensure the driver is installed
38
+ driver = webdriver.Chrome(service=service, options=chrome_options)
39
+ driver.get(url)
40
+
41
+ # Capture packets for the specified duration
42
+ time.sleep(capture_duration)
43
+
44
+ # Close the browser
45
+ driver.quit()
46
+
47
+ # Stop tshark
48
+ tshark_process.terminate()
49
+ tshark_process.wait()
50
+
51
+ # Read captured packets using pyshark for detailed packet information
52
+ packets = []
53
+ cap = pyshark.FileCapture(capture_file)
54
+ for packet in cap:
55
+ packets.append(str(packet))
56
+ cap.close()
57
+ return packets
58
+ except Exception as e:
59
+ print(f"Error in capturing packets: {e}")
60
+ return None
61
+
62
+ # Function to extract features from captured packets
63
+ def extract_features(capture_file):
64
+ try:
65
+ cap = pyshark.FileCapture(capture_file)
66
+
67
+ # Initialize features
68
+ features = {feature: 0 for feature in all_features}
69
+ total_packets = 0
70
+ total_bytes = 0
71
+ start_time = None
72
+ end_time = None
73
+ packet_lengths = []
74
+ protocol_counts = {'TCP': 0, 'UDP': 0, 'ICMP': 0}
75
+ tcp_flags = {'SYN': 0, 'ACK': 0, 'FIN': 0, 'RST': 0}
76
+
77
+ for packet in cap:
78
+ total_packets += 1
79
+ total_bytes += int(packet.length)
80
+ packet_lengths.append(int(packet.length))
81
+ timestamp = float(packet.sniff_time.timestamp())
82
+
83
+ if start_time is None:
84
+ start_time = timestamp
85
+ end_time = timestamp
86
+
87
+ # Counting protocols and flags
88
+ if hasattr(packet, 'tcp'):
89
+ protocol_counts['TCP'] += 1
90
+ if 'SYN' in packet.tcp.flags:
91
+ tcp_flags['SYN'] += 1
92
+ if 'ACK' in packet.tcp.flags:
93
+ tcp_flags['ACK'] += 1
94
+ if 'FIN' in packet.tcp.flags:
95
+ tcp_flags['FIN'] += 1
96
+ if 'RST' in packet.tcp.flags:
97
+ tcp_flags['RST'] += 1
98
+ elif hasattr(packet, 'udp'):
99
+ protocol_counts['UDP'] += 1
100
+ elif hasattr(packet, 'icmp'):
101
+ protocol_counts['ICMP'] += 1
102
+
103
+ duration = end_time - start_time if start_time and end_time else 0
104
+
105
+ # Populate extracted features
106
+ features.update({
107
+ "Flow Duration": duration,
108
+ "Total Packets": total_packets,
109
+ "Total Bytes": total_bytes,
110
+ "Fwd Packet Length Mean": np.mean(packet_lengths) if packet_lengths else 0,
111
+ "Bwd Packet Length Mean": 0, # Assuming no distinction here
112
+ "Flow Bytes/s": total_bytes / duration if duration else 0,
113
+ "Flow Packets/s": total_packets / duration if duration else 0,
114
+ "Average Packet Size": np.mean(packet_lengths) if packet_lengths else 0,
115
+ "Min Packet Size": min(packet_lengths) if packet_lengths else 0,
116
+ "Max Packet Size": max(packet_lengths) if packet_lengths else 0,
117
+ "Packet Length Variance": np.var(packet_lengths) if len(packet_lengths) > 1 else 0,
118
+ "TCP Packets": protocol_counts['TCP'],
119
+ "UDP Packets": protocol_counts['UDP'],
120
+ "ICMP Packets": protocol_counts['ICMP'],
121
+ "TCP SYN Flags": tcp_flags['SYN'],
122
+ "TCP ACK Flags": tcp_flags['ACK'],
123
+ "TCP FIN Flags": tcp_flags['FIN'],
124
+ "TCP RST Flags": tcp_flags['RST']
125
+ })
126
+
127
+ return features
128
+ except Exception as e:
129
+ print(f"Error in extracting features: {e}")
130
+ return None
131
+
132
+ # Function to compare features with CIC-IDS-2017 dataset
133
+ def compare_with_dataset(packet_features):
134
+ # Convert the extracted features into a format that the model can use
135
+ packet_features_series = pd.Series(packet_features)
136
+ packet_features_series = packet_features_series.reindex(all_features, fill_value=0)
137
+ # Predict using the loaded model
138
+ prediction = model.predict([packet_features_series])[0]
139
+ return "benign" if prediction == 0 else "malicious"
140
+
141
+ # Analyze the URL and predict if it's malicious
142
+ def analyze_url(url):
143
+ try:
144
+ # Capture packets using Scapy (updating to capture more specific traffic)
145
+ response = requests.get(url)
146
+ packets = scapy.sniff(count=100) # Capture packets with Scapy
147
+ capture_file = 'capture.pcap'
148
+ scapy.wrpcap(capture_file, packets)
149
+
150
+ # Extract features from the captured packets
151
+ packet_features = extract_features(capture_file)
152
+ if packet_features is not None:
153
+ prediction = compare_with_dataset(packet_features)
154
+
155
+ # Use Pyshark to capture HTTP/HTTPS/DNS packet details
156
+ http_dns_packets = capture_packets(url)
157
+
158
+ captured_packets = [str(packet) for packet in packets]
159
+ return prediction, {"scapy_packets": captured_packets, "http_dns_packets": http_dns_packets}
160
+ else:
161
+ return "Error in feature extraction", []
162
+ except Exception as e:
163
+ return str(e), []
164
+
165
+ # Define the Gradio interface
166
+ iface = gr.Interface(
167
+ fn=analyze_url,
168
+ inputs=gr.Textbox(label="Enter URL"),
169
+ outputs=[gr.Textbox(label="Prediction"), gr.JSON(label="Captured Packets")],
170
+ title="URL Malicious Activity Detection",
171
+ description="Enter a URL to predict if it's malicious or benign by analyzing the network traffic."
172
+ )
173
+
174
+ # Launch the interface
175
+ iface.launch(debug=True)