Spaces:
Running
Running
File size: 3,729 Bytes
eb30cad b1ddb38 8cd35aa b1ddb38 eb30cad 2f8164c eb30cad 2f8164c eb30cad 2f8164c eb30cad b1ddb38 e2e2b90 b1ddb38 e2e2b90 b1ddb38 8cd35aa b1ddb38 eb30cad b1ddb38 eb30cad b1ddb38 eb30cad b1ddb38 eb30cad 3a6bb00 eb30cad 209780d 968005b 8cd35aa eb30cad 8cd35aa eb30cad 120d185 3d7830a 26ce0ac 3d7830a 120d185 2f8164c 120d185 eb30cad 8e5d4ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import tensorflow as tf
import numpy as np
import nltk
import pickle
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import re
# Load the model
model = tf.keras.models.load_model('new_phishing_detection_model.keras')
# Compile the model with standard loss and metrics
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0005),
loss='binary_crossentropy',
metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
# Preprocessing functions
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
STOPWORDS = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()
def normalize_length(url, target_length=50):
if len(url) < target_length:
url = url + " " * (target_length - len(url))
else:
url = url[:target_length]
return url
def preprocess_url(url):
url = url.lower()
url = re.sub(r'https?://', '', url)
url = re.sub(r'www\.', '', url)
url = re.sub(r'[^a-zA-Z0-9]', ' ', url)
url = re.sub(r'\s+', ' ', url).strip()
url = normalize_length(url)
tokens = word_tokenize(url)
tokens = [word for word in tokens if word not in STOPWORDS]
tokens = [lemmatizer.lemmatize(word) for word in tokens]
return ' '.join(tokens)
def preprocess_html(html):
html = re.sub(r'<[^>]+>', ' ', html)
html = html.lower()
html = re.sub(r'https?://', '', html)
html = re.sub(r'[^a-zA-Z0-9]', ' ', html)
html = re.sub(r'\s+', ' ', html).strip()
tokens = word_tokenize(html)
tokens = [word for word in tokens if word not in STOPWORDS]
tokens = [lemmatizer.lemmatize(word) for word in tokens]
return ' '.join(tokens)
max_url_length = 180
max_html_length = 2000
max_words = 10000
# Load tokenizers
with open('url_tokenizer.pkl', 'rb') as f:
url_tokenizer = pickle.load(f)
with open('html_tokenizer.pkl', 'rb') as f:
html_tokenizer = pickle.load(f)
def preprocess_input(input_text, tokenizer, max_length):
sequences = tokenizer.texts_to_sequences([input_text])
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post', truncating='post')
return padded_sequences
def get_prediction(input_text, input_type):
is_url = input_type == "URL"
if is_url:
cleaned_text = preprocess_url(input_text)
input_data = preprocess_input(cleaned_text, url_tokenizer, max_url_length)
input_data = [input_data, np.zeros((1, max_html_length))] # dummy HTML input
else:
cleaned_text = preprocess_html(input_text)
input_data = preprocess_input(cleaned_text, html_tokenizer, max_html_length)
input_data = [np.zeros((1, max_url_length)), input_data] # dummy URL input
prediction = model.predict(input_data)[0][0]
return prediction
def phishing_detection(input_text, input_type):
prediction = get_prediction(input_text, input_type)
threshold = 0.5 # Adjusted threshold
if prediction > threshold:
return f"Warning: This site is likely a phishing site! ({prediction:.2f})"
else:
return f"Safe: This site is not likely a phishing site. ({prediction:.2f})"
iface = gr.Interface(
fn=phishing_detection,
inputs=[
gr.components.Textbox(lines=5, placeholder="Enter URL or HTML code"),
gr.components.Radio(["URL", "HTML"], type="value", label="Input Type")
],
outputs=gr.components.Textbox(label="Phishing Detection Result"),
title="Phishing Detection Model",
description="Check if a URL or HTML is Phishing.",
theme="default"
)
iface.launch()
|