Spaces:

srinuksv
/

project-final

Sleeping

App Files Files Community

srinuksv commited on Nov 6, 2024

Commit

a9280ab

verified ·

1 Parent(s): 2b1d13b

Create app.py

Browse files

Files changed (1) hide show

app.py +175 -0

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import time
+import subprocess
+import pyshark
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from webdriver_manager.chrome import ChromeDriverManager
+from selenium.webdriver.chrome.options import Options
+import numpy as np
+import joblib
+import pandas as pd
+import scapy.all as scapy
+import requests
+import gradio as gr
+# Load the pre-trained model and feature names
+model = joblib.load('extratrees.pkl')
+all_features = joblib.load('featurenames.pkl')
+# Modify the capture duration to a longer period
+def capture_packets(url, capture_duration=30, capture_file="capture.pcap"):
+    try:
+        # Start tshark to capture packets
+        tshark_process = subprocess.Popen(
+            ["tshark", "-i", "any", "-f", "tcp port 80 or tcp port 443 or port 53", "-w", capture_file],
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE
+        )
+        # Wait for tshark to start
+        time.sleep(2)
+        # Set up Chrome options
+        chrome_options = Options()
+        chrome_options.add_argument("--headless")  # Run Chrome in headless mode
+        chrome_options.add_argument("--no-sandbox")
+        chrome_options.add_argument("--disable-dev-shm-usage")
+        # Use Selenium to visit the URL
+        service = Service(ChromeDriverManager().install())  # Ensure the driver is installed
+        driver = webdriver.Chrome(service=service, options=chrome_options)
+        driver.get(url)
+        # Capture packets for the specified duration
+        time.sleep(capture_duration)
+        # Close the browser
+        driver.quit()
+        # Stop tshark
+        tshark_process.terminate()
+        tshark_process.wait()
+        # Read captured packets using pyshark for detailed packet information
+        packets = []
+        cap = pyshark.FileCapture(capture_file)
+        for packet in cap:
+            packets.append(str(packet))
+        cap.close()
+        return packets
+    except Exception as e:
+        print(f"Error in capturing packets: {e}")
+        return None
+# Function to extract features from captured packets
+def extract_features(capture_file):
+    try:
+        cap = pyshark.FileCapture(capture_file)
+        # Initialize features
+        features = {feature: 0 for feature in all_features}
+        total_packets = 0
+        total_bytes = 0
+        start_time = None
+        end_time = None
+        packet_lengths = []
+        protocol_counts = {'TCP': 0, 'UDP': 0, 'ICMP': 0}
+        tcp_flags = {'SYN': 0, 'ACK': 0, 'FIN': 0, 'RST': 0}
+        for packet in cap:
+            total_packets += 1
+            total_bytes += int(packet.length)
+            packet_lengths.append(int(packet.length))
+            timestamp = float(packet.sniff_time.timestamp())
+            if start_time is None:
+                start_time = timestamp
+            end_time = timestamp
+            # Counting protocols and flags
+            if hasattr(packet, 'tcp'):
+                protocol_counts['TCP'] += 1
+                if 'SYN' in packet.tcp.flags:
+                    tcp_flags['SYN'] += 1
+                if 'ACK' in packet.tcp.flags:
+                    tcp_flags['ACK'] += 1
+                if 'FIN' in packet.tcp.flags:
+                    tcp_flags['FIN'] += 1
+                if 'RST' in packet.tcp.flags:
+                    tcp_flags['RST'] += 1
+            elif hasattr(packet, 'udp'):
+                protocol_counts['UDP'] += 1
+            elif hasattr(packet, 'icmp'):
+                protocol_counts['ICMP'] += 1
+        duration = end_time - start_time if start_time and end_time else 0
+        # Populate extracted features
+        features.update({
+            "Flow Duration": duration,
+            "Total Packets": total_packets,
+            "Total Bytes": total_bytes,
+            "Fwd Packet Length Mean": np.mean(packet_lengths) if packet_lengths else 0,
+            "Bwd Packet Length Mean": 0,  # Assuming no distinction here
+            "Flow Bytes/s": total_bytes / duration if duration else 0,
+            "Flow Packets/s": total_packets / duration if duration else 0,
+            "Average Packet Size": np.mean(packet_lengths) if packet_lengths else 0,
+            "Min Packet Size": min(packet_lengths) if packet_lengths else 0,
+            "Max Packet Size": max(packet_lengths) if packet_lengths else 0,
+            "Packet Length Variance": np.var(packet_lengths) if len(packet_lengths) > 1 else 0,
+            "TCP Packets": protocol_counts['TCP'],
+            "UDP Packets": protocol_counts['UDP'],
+            "ICMP Packets": protocol_counts['ICMP'],
+            "TCP SYN Flags": tcp_flags['SYN'],
+            "TCP ACK Flags": tcp_flags['ACK'],
+            "TCP FIN Flags": tcp_flags['FIN'],
+            "TCP RST Flags": tcp_flags['RST']
+        })
+        return features
+    except Exception as e:
+        print(f"Error in extracting features: {e}")
+        return None
+# Function to compare features with CIC-IDS-2017 dataset
+def compare_with_dataset(packet_features):
+    # Convert the extracted features into a format that the model can use
+    packet_features_series = pd.Series(packet_features)
+    packet_features_series = packet_features_series.reindex(all_features, fill_value=0)
+    # Predict using the loaded model
+    prediction = model.predict([packet_features_series])[0]
+    return "benign" if prediction == 0 else "malicious"
+# Analyze the URL and predict if it's malicious
+def analyze_url(url):
+    try:
+        # Capture packets using Scapy (updating to capture more specific traffic)
+        response = requests.get(url)
+        packets = scapy.sniff(count=100)  # Capture packets with Scapy
+        capture_file = 'capture.pcap'
+        scapy.wrpcap(capture_file, packets)
+        # Extract features from the captured packets
+        packet_features = extract_features(capture_file)
+        if packet_features is not None:
+            prediction = compare_with_dataset(packet_features)
+            # Use Pyshark to capture HTTP/HTTPS/DNS packet details
+            http_dns_packets = capture_packets(url)
+            captured_packets = [str(packet) for packet in packets]
+            return prediction, {"scapy_packets": captured_packets, "http_dns_packets": http_dns_packets}
+        else:
+            return "Error in feature extraction", []
+    except Exception as e:
+        return str(e), []
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=analyze_url,
+    inputs=gr.Textbox(label="Enter URL"),
+    outputs=[gr.Textbox(label="Prediction"), gr.JSON(label="Captured Packets")],
+    title="URL Malicious Activity Detection",
+    description="Enter a URL to predict if it's malicious or benign by analyzing the network traffic."
+)
+# Launch the interface
+iface.launch(debug=True)