Spaces:

srinuksv
/

project-final

Sleeping

File size: 6,715 Bytes

a9280ab

import time
import subprocess
import pyshark
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
import numpy as np
import joblib
import pandas as pd
import scapy.all as scapy
import requests
import gradio as gr

# Load the pre-trained model and feature names
model = joblib.load('extratrees.pkl')
all_features = joblib.load('featurenames.pkl')

# Modify the capture duration to a longer period
def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
    try:
        # Start tshark to capture packets
        tshark_process = subprocess.Popen(
            ["tshark", "-i", "any", "-f", "tcp port 80 or tcp port 443 or port 53", "-w", capture_file],
            stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
        # Wait for tshark to start
        time.sleep(2)

        # Set up Chrome options
        chrome_options = Options()
        chrome_options.add_argument("--headless")  # Run Chrome in headless mode
        chrome_options.add_argument("--no-sandbox")
        chrome_options.add_argument("--disable-dev-shm-usage")

        # Use Selenium to visit the URL
        service = Service(ChromeDriverManager().install())  # Ensure the driver is installed
        driver = webdriver.Chrome(service=service, options=chrome_options)
        driver.get(url)

        # Capture packets for the specified duration
        time.sleep(capture_duration)

        # Close the browser
        driver.quit()

        # Stop tshark
        tshark_process.terminate()
        tshark_process.wait()

        # Read captured packets using pyshark for detailed packet information
        packets = []
        cap = pyshark.FileCapture(capture_file)
        for packet in cap:
            packets.append(str(packet))
        cap.close()
        return packets
    except Exception as e:
        print(f"Error in capturing packets: {e}")
        return None

# Function to extract features from captured packets
def extract_features(capture_file):
    try:
        cap = pyshark.FileCapture(capture_file)

        # Initialize features
        features = {feature: 0 for feature in all_features}
        total_packets = 0
        total_bytes = 0
        start_time = None
        end_time = None
        packet_lengths = []
        protocol_counts = {'TCP': 0, 'UDP': 0, 'ICMP': 0}
        tcp_flags = {'SYN': 0, 'ACK': 0, 'FIN': 0, 'RST': 0}

        for packet in cap:
            total_packets += 1
            total_bytes += int(packet.length)
            packet_lengths.append(int(packet.length))
            timestamp = float(packet.sniff_time.timestamp())

            if start_time is None:
                start_time = timestamp
            end_time = timestamp

            # Counting protocols and flags
            if hasattr(packet, 'tcp'):
                protocol_counts['TCP'] += 1
                if 'SYN' in packet.tcp.flags:
                    tcp_flags['SYN'] += 1
                if 'ACK' in packet.tcp.flags:
                    tcp_flags['ACK'] += 1
                if 'FIN' in packet.tcp.flags:
                    tcp_flags['FIN'] += 1
                if 'RST' in packet.tcp.flags:
                    tcp_flags['RST'] += 1
            elif hasattr(packet, 'udp'):
                protocol_counts['UDP'] += 1
            elif hasattr(packet, 'icmp'):
                protocol_counts['ICMP'] += 1

        duration = end_time - start_time if start_time and end_time else 0

        # Populate extracted features
        features.update({
            "Flow Duration": duration,
            "Total Packets": total_packets,
            "Total Bytes": total_bytes,
            "Fwd Packet Length Mean": np.mean(packet_lengths) if packet_lengths else 0,
            "Bwd Packet Length Mean": 0,  # Assuming no distinction here
            "Flow Bytes/s": total_bytes / duration if duration else 0,
            "Flow Packets/s": total_packets / duration if duration else 0,
            "Average Packet Size": np.mean(packet_lengths) if packet_lengths else 0,
            "Min Packet Size": min(packet_lengths) if packet_lengths else 0,
            "Max Packet Size": max(packet_lengths) if packet_lengths else 0,
            "Packet Length Variance": np.var(packet_lengths) if len(packet_lengths) > 1 else 0,
            "TCP Packets": protocol_counts['TCP'],
            "UDP Packets": protocol_counts['UDP'],
            "ICMP Packets": protocol_counts['ICMP'],
            "TCP SYN Flags": tcp_flags['SYN'],
            "TCP ACK Flags": tcp_flags['ACK'],
            "TCP FIN Flags": tcp_flags['FIN'],
            "TCP RST Flags": tcp_flags['RST']
        })

        return features
    except Exception as e:
        print(f"Error in extracting features: {e}")
        return None

# Function to compare features with CIC-IDS-2017 dataset
def compare_with_dataset(packet_features):
    # Convert the extracted features into a format that the model can use
    packet_features_series = pd.Series(packet_features)
    packet_features_series = packet_features_series.reindex(all_features, fill_value=0)
    # Predict using the loaded model
    prediction = model.predict([packet_features_series])[0]
    return "benign" if prediction == 0 else "malicious"

# Analyze the URL and predict if it's malicious
def analyze_url(url):
    try:
        # Capture packets using Scapy (updating to capture more specific traffic)
        response = requests.get(url)
        packets = scapy.sniff(count=100)  # Capture packets with Scapy
        capture_file = 'capture.pcap'
        scapy.wrpcap(capture_file, packets)

        # Extract features from the captured packets
        packet_features = extract_features(capture_file)
        if packet_features is not None:
            prediction = compare_with_dataset(packet_features)

            # Use Pyshark to capture HTTP/HTTPS/DNS packet details
            http_dns_packets = capture_packets(url)

            captured_packets = [str(packet) for packet in packets]
            return prediction, {"scapy_packets": captured_packets, "http_dns_packets": http_dns_packets}
        else:
            return "Error in feature extraction", []
    except Exception as e:
        return str(e), []

# Define the Gradio interface
iface = gr.Interface(
    fn=analyze_url,
    inputs=gr.Textbox(label="Enter URL"),
    outputs=[gr.Textbox(label="Prediction"), gr.JSON(label="Captured Packets")],
    title="URL Malicious Activity Detection",
    description="Enter a URL to predict if it's malicious or benign by analyzing the network traffic."
)

# Launch the interface
iface.launch(debug=True)