srinuksv commited on
Commit
67f87e9
·
verified ·
1 Parent(s): bdbc0c3

Update link4 (2).py

Browse files
Files changed (1) hide show
  1. link4 (2).py +2 -213
link4 (2).py CHANGED
@@ -1,181 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
- """link4.ipynb
3
-
4
- Automatically generated by Colab.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1yTE900ZWoLy3vQwKE1Y-Qbm263XCIuN7
8
- """
9
-
10
- !pip install selenium
11
- !pip install webdriver-manager
12
- !pip install pyshark
13
- !pip install gradio
14
- !apt-get update
15
- !apt-get install -y tshark
16
- !tshark --version
17
- !pip install gradio requests scapy joblib pyshark
18
-
19
- from google.colab import drive
20
- drive.mount('/content/drive')
21
-
22
- import pandas as pd
23
- from sklearn.model_selection import train_test_split, cross_val_score
24
- from sklearn.ensemble import ExtraTreesClassifier
25
- from sklearn.metrics import classification_report
26
- import joblib
27
- import subprocess
28
- import time
29
- from selenium import webdriver
30
- from selenium.webdriver.chrome.service import Service
31
- from webdriver_manager.chrome import ChromeDriverManager
32
- from selenium.webdriver.chrome.options import Options
33
- import pyshark
34
- import numpy as np
35
-
36
- file_paths = [
37
- '/content/drive/MyDrive/Colab Notebooks/link1/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv',
38
- '/content/drive/MyDrive/Colab Notebooks/link1/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv',
39
- '/content/drive/MyDrive/Colab Notebooks/link1/Friday-WorkingHours-Morning.pcap_ISCX.csv',
40
- '/content/drive/MyDrive/Colab Notebooks/link1/Monday-WorkingHours.pcap_ISCX.csv',
41
- '/content/drive/MyDrive/Colab Notebooks/link1/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv',
42
- '/content/drive/MyDrive/Colab Notebooks/link1/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv',
43
- '/content/drive/MyDrive/Colab Notebooks/link1/Tuesday-WorkingHours.pcap_ISCX.csv',
44
- '/content/drive/MyDrive/Colab Notebooks/link1/Wednesday-workingHours.pcap_ISCX.csv'
45
- ]
46
-
47
- # Combine all files into a single DataFrame
48
- df = pd.concat([pd.read_csv(file) for file in file_paths], ignore_index=True)
49
-
50
- # Strip any leading or trailing spaces from column names
51
- df.columns = df.columns.str.strip()
52
-
53
- # Print the first few rows and column names to verify
54
- print("Columns in DataFrame:")
55
- print(df.columns)
56
-
57
- # Check if 'Label' exists
58
- if 'Label' not in df.columns:
59
- print("Error: 'Label' column not found in the dataset.")
60
- else:
61
- # Proceed with mapping the labels to "benign" or "malicious"
62
- label_mapping = {
63
- 'BENIGN': 'benign',
64
- 'DDoS': 'malicious',
65
- 'PortScan': 'malicious',
66
- 'Bot': 'malicious',
67
- 'Infiltration': 'malicious',
68
- 'Web Attack': 'malicious',
69
- # Add other malicious classes here if necessary
70
- }
71
-
72
- # Map the labels and fill missing values with 'malicious'
73
- df['Label'] = df['Label'].map(label_mapping).fillna('malicious')
74
-
75
- # Convert categorical labels to numerical
76
- df['Label'] = df['Label'].astype('category').cat.codes
77
-
78
- # Define features and target
79
- all_features = df.columns.drop('Label')
80
- features = df[all_features]
81
- target = df['Label']
82
-
83
- # Print first few rows of the processed DataFrame
84
- print(df.head())
85
- print(df.columns)
86
- print(f"Features columns: {features.columns}")
87
- print(f"Target unique values: {target.unique()}")
88
-
89
- from sklearn.impute import SimpleImputer
90
- from sklearn.preprocessing import StandardScaler
91
- print(f"Missing values in features:\n{features.isnull().sum()}")
92
- print(f"Missing values in target:\n{target.isnull().sum()}")
93
- print(f"Infinites in features:\n{np.isinf(features).sum()}")
94
-
95
- # Replace infinite values with NaN
96
- features.replace([np.inf, -np.inf], np.nan, inplace=True)
97
-
98
- # Handle missing values: Impute with the mean (for numerical features)
99
- imputer = SimpleImputer(strategy='mean')
100
- features_imputed = imputer.fit_transform(features)
101
-
102
- # Normalize features to handle large values
103
- scaler = StandardScaler()
104
- features_scaled = scaler.fit_transform(features_imputed)
105
-
106
- # Split data into training and testing sets
107
- X_train, X_test, y_train, y_test = train_test_split(features_imputed, target, test_size=0.3, random_state=42, stratify=target)
108
-
109
- # Initialize and train the Extra Trees model
110
- model = ExtraTreesClassifier(n_estimators=100, random_state=42)
111
- model.fit(X_train, y_train)
112
-
113
- y_pred = model.predict(X_test)
114
- print(classification_report(y_test, y_pred))
115
- from sklearn.metrics import accuracy_score, classification_report
116
- train_predictions = model.predict(X_train)
117
- test_predictions = model.predict(X_test)
118
-
119
- train_accuracy = accuracy_score(y_train, train_predictions)
120
- test_accuracy = accuracy_score(y_test, test_predictions)
121
-
122
- print(f"Training Accuracy: {train_accuracy:.4f}")
123
- print(f"Testing Accuracy: {test_accuracy:.4f}")
124
-
125
- print("Classification Report (Test Data):")
126
- print(classification_report(y_test, test_predictions))
127
-
128
- # Save the model and feature names
129
- joblib.dump(model, 'extratrees.pkl')
130
- joblib.dump(all_features.tolist(), 'featurenames.pkl')
131
- import joblib
132
-
133
- # Load the model and feature names
134
- loaded_model = joblib.load('extratrees.pkl')
135
- loaded_features = joblib.load('featurenames.pkl')
136
-
137
- # Check if they are loaded successfully
138
- print(f"Model Loaded: {loaded_model is not None}")
139
- print(f"Features Loaded: {loaded_features is not None}")
140
-
141
- # prompt: print different styles and new styles for the classification report\
142
- import matplotlib.pyplot as plt
143
- import seaborn as sns
144
- from sklearn.metrics import classification_report
145
- def plot_classification_report_styled(y_true, y_pred):
146
- report = classification_report(y_true, y_pred, output_dict=True)
147
- df_report = pd.DataFrame(report).transpose()
148
-
149
- # Style the DataFrame with different colors and formatting
150
- styled_report = df_report.style.background_gradient(cmap='viridis', axis=None) \
151
- .highlight_max(color='lightgreen', axis=0) \
152
- .highlight_min(color='lightcoral', axis=0) \
153
- .format('{:.2f}')
154
-
155
- # Display the styled report
156
- display(styled_report)
157
-
158
-
159
- # Use the new function to display a styled classification report
160
- plot_classification_report_styled(y_test, y_pred)
161
-
162
-
163
- # Alternative Styling using Seaborn and Matplotlib with customization
164
-
165
- def plot_classification_report_seaborn_styled(y_true, y_pred):
166
- report = classification_report(y_true, y_pred, output_dict=True)
167
- df_report = pd.DataFrame(report).transpose()
168
- plt.figure(figsize=(10, 6))
169
- sns.heatmap(df_report[['precision', 'recall', 'f1-score']], annot=True, fmt=".2f", cmap="YlGnBu", linewidths=.5, annot_kws={"size": 12})
170
- plt.title("Classification Report Heatmap", fontsize=16)
171
- plt.xlabel("Metrics", fontsize=14)
172
- plt.ylabel("Classes", fontsize=14)
173
- plt.xticks(fontsize=12)
174
- plt.yticks(fontsize=12)
175
- plt.show()
176
-
177
- plot_classification_report_seaborn_styled(y_test, y_pred)
178
-
179
  import time
180
  import subprocess
181
  import pyshark
@@ -191,8 +13,8 @@ import requests
191
  import gradio as gr
192
 
193
  # Load the pre-trained model and feature names
194
- model = joblib.load('/content/drive/MyDrive/Colab Notebooks/link1/extratrees.pkl')
195
- all_features = joblib.load('/content/drive/MyDrive/Colab Notebooks/link1/featurenames.pkl')
196
 
197
  # Modify the capture duration to a longer period
198
  def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
@@ -352,36 +174,3 @@ iface = gr.Interface(
352
  # Launch the interface
353
  iface.launch(debug=True)
354
 
355
- import matplotlib.pyplot as plt
356
- import numpy as np
357
-
358
- # Sample data extracted from captured packets
359
- # These would come from the extracted packet features
360
- tcp_counts = 20 # Number of TCP packets
361
- udp_counts = 10 # Number of UDP packets
362
- packet_sizes = [60, 150, 300, 450, 500, 700, 900, 1100, 1400, 1600] # Example packet sizes in bytes
363
- timestamps = np.linspace(0, 30, len(packet_sizes)) # Sample timestamps over 30 seconds
364
-
365
- # Create a new figure
366
- plt.figure(figsize=(10, 6))
367
-
368
- # Plot TCP and UDP packet counts in a bar chart
369
- plt.subplot(2, 1, 1) # 2 rows, 1 column, first plot
370
- plt.bar(['TCP', 'UDP'], [tcp_counts, udp_counts], color=['blue', 'orange'])
371
- plt.title('TCP vs UDP Packet Counts')
372
- plt.xlabel('Protocol')
373
- plt.ylabel('Packet Count')
374
-
375
- # Plot packet sizes over time
376
- plt.subplot(2, 1, 2) # 2 rows, 1 column, second plot
377
- plt.plot(timestamps, packet_sizes, marker='o', color='green')
378
- plt.title('Packet Sizes over Time')
379
- plt.xlabel('Time (s)')
380
- plt.ylabel('Packet Size (bytes)')
381
-
382
- # Adjust layout to prevent overlap
383
- plt.tight_layout()
384
-
385
- # Display the plots
386
- plt.show()
387
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import time
2
  import subprocess
3
  import pyshark
 
13
  import gradio as gr
14
 
15
  # Load the pre-trained model and feature names
16
+ model = joblib.load('extratrees.pkl')
17
+ all_features = joblib.load('featurenames.pkl')
18
 
19
  # Modify the capture duration to a longer period
20
  def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
 
174
  # Launch the interface
175
  iface.launch(debug=True)
176