Spaces:

srinuksv
/

project-final

Sleeping

App Files Files Community

srinuksv commited on Nov 6, 2024

Commit

67f87e9

verified ·

1 Parent(s): bdbc0c3

Update link4 (2).py

Browse files

Files changed (1) hide show

link4 (2).py +2 -213

link4 (2).py CHANGED Viewed

@@ -1,181 +1,3 @@
-# -*- coding: utf-8 -*-
-"""link4.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1yTE900ZWoLy3vQwKE1Y-Qbm263XCIuN7
-"""
-!pip install selenium
-!pip install webdriver-manager
-!pip install pyshark
-!pip install gradio
-!apt-get update
-!apt-get install -y tshark
-!tshark --version
-!pip install gradio requests scapy joblib pyshark
-from google.colab import drive
-drive.mount('/content/drive')
-import pandas as pd
-from sklearn.model_selection import train_test_split, cross_val_score
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.metrics import classification_report
-import joblib
-import subprocess
-import time
-from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
-from webdriver_manager.chrome import ChromeDriverManager
-from selenium.webdriver.chrome.options import Options
-import pyshark
-import numpy as np
-file_paths = [
-    '/content/drive/MyDrive/Colab Notebooks/link1/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Friday-WorkingHours-Afternoon-PortScan.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Friday-WorkingHours-Morning.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Monday-WorkingHours.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Thursday-WorkingHours-Afternoon-Infilteration.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Thursday-WorkingHours-Morning-WebAttacks.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Tuesday-WorkingHours.pcap_ISCX.csv',
-    '/content/drive/MyDrive/Colab Notebooks/link1/Wednesday-workingHours.pcap_ISCX.csv'
-]
-# Combine all files into a single DataFrame
-df = pd.concat([pd.read_csv(file) for file in file_paths], ignore_index=True)
-# Strip any leading or trailing spaces from column names
-df.columns = df.columns.str.strip()
-# Print the first few rows and column names to verify
-print("Columns in DataFrame:")
-print(df.columns)
-# Check if 'Label' exists
-if 'Label' not in df.columns:
-    print("Error: 'Label' column not found in the dataset.")
-else:
-    # Proceed with mapping the labels to "benign" or "malicious"
-    label_mapping = {
-        'BENIGN': 'benign',
-        'DDoS': 'malicious',
-        'PortScan': 'malicious',
-        'Bot': 'malicious',
-        'Infiltration': 'malicious',
-        'Web Attack': 'malicious',
-        # Add other malicious classes here if necessary
-    }
-    # Map the labels and fill missing values with 'malicious'
-    df['Label'] = df['Label'].map(label_mapping).fillna('malicious')
-    # Convert categorical labels to numerical
-    df['Label'] = df['Label'].astype('category').cat.codes
-    # Define features and target
-    all_features = df.columns.drop('Label')
-    features = df[all_features]
-    target = df['Label']
-    # Print first few rows of the processed DataFrame
-    print(df.head())
-    print(df.columns)
-    print(f"Features columns: {features.columns}")
-print(f"Target unique values: {target.unique()}")
-from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler
-print(f"Missing values in features:\n{features.isnull().sum()}")
-print(f"Missing values in target:\n{target.isnull().sum()}")
-print(f"Infinites in features:\n{np.isinf(features).sum()}")
-# Replace infinite values with NaN
-features.replace([np.inf, -np.inf], np.nan, inplace=True)
-# Handle missing values: Impute with the mean (for numerical features)
-imputer = SimpleImputer(strategy='mean')
-features_imputed = imputer.fit_transform(features)
-# Normalize features to handle large values
-scaler = StandardScaler()
-features_scaled = scaler.fit_transform(features_imputed)
-# Split data into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(features_imputed, target, test_size=0.3, random_state=42, stratify=target)
-# Initialize and train the Extra Trees model
-model = ExtraTreesClassifier(n_estimators=100, random_state=42)
-model.fit(X_train, y_train)
-y_pred = model.predict(X_test)
-print(classification_report(y_test, y_pred))
-from sklearn.metrics import accuracy_score, classification_report
-train_predictions = model.predict(X_train)
-test_predictions = model.predict(X_test)
-train_accuracy = accuracy_score(y_train, train_predictions)
-test_accuracy = accuracy_score(y_test, test_predictions)
-print(f"Training Accuracy: {train_accuracy:.4f}")
-print(f"Testing Accuracy: {test_accuracy:.4f}")
-print("Classification Report (Test Data):")
-print(classification_report(y_test, test_predictions))
-# Save the model and feature names
-joblib.dump(model, 'extratrees.pkl')
-joblib.dump(all_features.tolist(), 'featurenames.pkl')
-import joblib
-# Load the model and feature names
-loaded_model = joblib.load('extratrees.pkl')
-loaded_features = joblib.load('featurenames.pkl')
-# Check if they are loaded successfully
-print(f"Model Loaded: {loaded_model is not None}")
-print(f"Features Loaded: {loaded_features is not None}")
-# prompt: print different styles and new styles for the classification report\
-import matplotlib.pyplot as plt
-import seaborn as sns
-from sklearn.metrics import classification_report
-def plot_classification_report_styled(y_true, y_pred):
-  report = classification_report(y_true, y_pred, output_dict=True)
-  df_report = pd.DataFrame(report).transpose()
-  # Style the DataFrame with different colors and formatting
-  styled_report = df_report.style.background_gradient(cmap='viridis', axis=None) \
-    .highlight_max(color='lightgreen', axis=0) \
-    .highlight_min(color='lightcoral', axis=0) \
-    .format('{:.2f}')
-  # Display the styled report
-  display(styled_report)
-# Use the new function to display a styled classification report
-plot_classification_report_styled(y_test, y_pred)
-# Alternative Styling using Seaborn and Matplotlib with customization
-def plot_classification_report_seaborn_styled(y_true, y_pred):
-  report = classification_report(y_true, y_pred, output_dict=True)
-  df_report = pd.DataFrame(report).transpose()
-  plt.figure(figsize=(10, 6))
-  sns.heatmap(df_report[['precision', 'recall', 'f1-score']], annot=True, fmt=".2f", cmap="YlGnBu", linewidths=.5, annot_kws={"size": 12})
-  plt.title("Classification Report Heatmap", fontsize=16)
-  plt.xlabel("Metrics", fontsize=14)
-  plt.ylabel("Classes", fontsize=14)
-  plt.xticks(fontsize=12)
-  plt.yticks(fontsize=12)
-  plt.show()
-plot_classification_report_seaborn_styled(y_test, y_pred)
 import time
 import subprocess
 import pyshark
@@ -191,8 +13,8 @@ import requests
 import gradio as gr
 # Load the pre-trained model and feature names
-model = joblib.load('/content/drive/MyDrive/Colab Notebooks/link1/extratrees.pkl')
-all_features = joblib.load('/content/drive/MyDrive/Colab Notebooks/link1/featurenames.pkl')
 # Modify the capture duration to a longer period
 def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
@@ -352,36 +174,3 @@ iface = gr.Interface(
 # Launch the interface
 iface.launch(debug=True)
-import matplotlib.pyplot as plt
-import numpy as np
-# Sample data extracted from captured packets
-# These would come from the extracted packet features
-tcp_counts = 20  # Number of TCP packets
-udp_counts = 10  # Number of UDP packets
-packet_sizes = [60, 150, 300, 450, 500, 700, 900, 1100, 1400, 1600]  # Example packet sizes in bytes
-timestamps = np.linspace(0, 30, len(packet_sizes))  # Sample timestamps over 30 seconds
-# Create a new figure
-plt.figure(figsize=(10, 6))
-# Plot TCP and UDP packet counts in a bar chart
-plt.subplot(2, 1, 1)  # 2 rows, 1 column, first plot
-plt.bar(['TCP', 'UDP'], [tcp_counts, udp_counts], color=['blue', 'orange'])
-plt.title('TCP vs UDP Packet Counts')
-plt.xlabel('Protocol')
-plt.ylabel('Packet Count')
-# Plot packet sizes over time
-plt.subplot(2, 1, 2)  # 2 rows, 1 column, second plot
-plt.plot(timestamps, packet_sizes, marker='o', color='green')
-plt.title('Packet Sizes over Time')
-plt.xlabel('Time (s)')
-plt.ylabel('Packet Size (bytes)')
-# Adjust layout to prevent overlap
-plt.tight_layout()
-# Display the plots
-plt.show()

 import time
 import subprocess
 import pyshark
 import gradio as gr
 # Load the pre-trained model and feature names
+model = joblib.load('extratrees.pkl')
+all_features = joblib.load('featurenames.pkl')
 # Modify the capture duration to a longer period
 def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
 # Launch the interface
 iface.launch(debug=True)