Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -66,19 +66,19 @@ with open('label_encoder.pkl', 'rb') as file:
|
|
66 |
def predict_phishing(url, html):
|
67 |
cleaned_url = preprocess_url(url)
|
68 |
cleaned_html = preprocess_html(html)
|
69 |
-
|
70 |
new_url_sequences = url_tokenizer.texts_to_sequences([cleaned_url])
|
71 |
new_url_padded = pad_sequences(new_url_sequences, maxlen=max_url_length, padding='post', truncating='post')
|
72 |
-
|
73 |
new_html_sequences = html_tokenizer.texts_to_sequences([cleaned_html])
|
74 |
new_html_padded = pad_sequences(new_html_sequences, maxlen=max_html_length, padding='post', truncating='post')
|
75 |
-
|
76 |
new_predictions_prob = model.predict([new_url_padded, new_html_padded])
|
77 |
new_predictions = (new_predictions_prob > 0.6).astype(int) # Adjust threshold if needed
|
78 |
-
|
79 |
predicted_category = label_encoder.inverse_transform(new_predictions)[0]
|
80 |
predicted_probability = f"{new_predictions_prob[0][0]:.4f}"
|
81 |
-
|
82 |
return predicted_category.capitalize(), predicted_probability
|
83 |
|
84 |
# Create Gradio Interface
|
@@ -86,14 +86,17 @@ interface = gr.Interface(
|
|
86 |
fn=predict_phishing,
|
87 |
inputs=[
|
88 |
gr.components.Textbox(label="URL"),
|
89 |
-
gr.components.Textbox(label="HTML Snippet")
|
90 |
],
|
91 |
outputs=[
|
92 |
gr.components.Textbox(label="Predicted Category"),
|
93 |
gr.components.Textbox(label="Predicted Probability")
|
94 |
],
|
95 |
title="Phishing Detection Model",
|
96 |
-
description="Enter a URL and its HTML content to predict if it's spam or legitimate."
|
|
|
|
|
|
|
97 |
)
|
98 |
|
99 |
# Launch the Gradio interface
|
|
|
66 |
def predict_phishing(url, html):
|
67 |
cleaned_url = preprocess_url(url)
|
68 |
cleaned_html = preprocess_html(html)
|
69 |
+
|
70 |
new_url_sequences = url_tokenizer.texts_to_sequences([cleaned_url])
|
71 |
new_url_padded = pad_sequences(new_url_sequences, maxlen=max_url_length, padding='post', truncating='post')
|
72 |
+
|
73 |
new_html_sequences = html_tokenizer.texts_to_sequences([cleaned_html])
|
74 |
new_html_padded = pad_sequences(new_html_sequences, maxlen=max_html_length, padding='post', truncating='post')
|
75 |
+
|
76 |
new_predictions_prob = model.predict([new_url_padded, new_html_padded])
|
77 |
new_predictions = (new_predictions_prob > 0.6).astype(int) # Adjust threshold if needed
|
78 |
+
|
79 |
predicted_category = label_encoder.inverse_transform(new_predictions)[0]
|
80 |
predicted_probability = f"{new_predictions_prob[0][0]:.4f}"
|
81 |
+
|
82 |
return predicted_category.capitalize(), predicted_probability
|
83 |
|
84 |
# Create Gradio Interface
|
|
|
86 |
fn=predict_phishing,
|
87 |
inputs=[
|
88 |
gr.components.Textbox(label="URL"),
|
89 |
+
gr.components.Textbox(label="HTML Snippet", lines=10, placeholder="Paste HTML content here")
|
90 |
],
|
91 |
outputs=[
|
92 |
gr.components.Textbox(label="Predicted Category"),
|
93 |
gr.components.Textbox(label="Predicted Probability")
|
94 |
],
|
95 |
title="Phishing Detection Model",
|
96 |
+
description="Enter a URL and its HTML content to predict if it's spam or legitimate. It's recommended to provide both for accurate results.",
|
97 |
+
theme="huggingface",
|
98 |
+
live=True,
|
99 |
+
css=".interface-container { border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; }"
|
100 |
)
|
101 |
|
102 |
# Launch the Gradio interface
|