web-phishing-detection

Sleeping

App Files Files Community

rmdhirr commited on Jun 16, 2024

Commit

e5bfa3c

verified ·

1 Parent(s): 10614c0

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -7

app.py CHANGED Viewed

@@ -66,19 +66,19 @@ with open('label_encoder.pkl', 'rb') as file:
 def predict_phishing(url, html):
     cleaned_url = preprocess_url(url)
     cleaned_html = preprocess_html(html)
     new_url_sequences = url_tokenizer.texts_to_sequences([cleaned_url])
     new_url_padded = pad_sequences(new_url_sequences, maxlen=max_url_length, padding='post', truncating='post')
     new_html_sequences = html_tokenizer.texts_to_sequences([cleaned_html])
     new_html_padded = pad_sequences(new_html_sequences, maxlen=max_html_length, padding='post', truncating='post')
     new_predictions_prob = model.predict([new_url_padded, new_html_padded])
     new_predictions = (new_predictions_prob > 0.6).astype(int)  # Adjust threshold if needed
     predicted_category = label_encoder.inverse_transform(new_predictions)[0]
     predicted_probability = f"{new_predictions_prob[0][0]:.4f}"
     return predicted_category.capitalize(), predicted_probability
 # Create Gradio Interface
@@ -86,14 +86,17 @@ interface = gr.Interface(
     fn=predict_phishing,
     inputs=[
         gr.components.Textbox(label="URL"),
-        gr.components.Textbox(label="HTML Snippet")
     ],
     outputs=[
         gr.components.Textbox(label="Predicted Category"),
         gr.components.Textbox(label="Predicted Probability")
     ],
     title="Phishing Detection Model",
-    description="Enter a URL and its HTML content to predict if it's spam or legitimate."
 )
 # Launch the Gradio interface

 def predict_phishing(url, html):
     cleaned_url = preprocess_url(url)
     cleaned_html = preprocess_html(html)
     new_url_sequences = url_tokenizer.texts_to_sequences([cleaned_url])
     new_url_padded = pad_sequences(new_url_sequences, maxlen=max_url_length, padding='post', truncating='post')
     new_html_sequences = html_tokenizer.texts_to_sequences([cleaned_html])
     new_html_padded = pad_sequences(new_html_sequences, maxlen=max_html_length, padding='post', truncating='post')
     new_predictions_prob = model.predict([new_url_padded, new_html_padded])
     new_predictions = (new_predictions_prob > 0.6).astype(int)  # Adjust threshold if needed
     predicted_category = label_encoder.inverse_transform(new_predictions)[0]
     predicted_probability = f"{new_predictions_prob[0][0]:.4f}"
     return predicted_category.capitalize(), predicted_probability
 # Create Gradio Interface
     fn=predict_phishing,
     inputs=[
         gr.components.Textbox(label="URL"),
+        gr.components.Textbox(label="HTML Snippet", lines=10, placeholder="Paste HTML content here")
     ],
     outputs=[
         gr.components.Textbox(label="Predicted Category"),
         gr.components.Textbox(label="Predicted Probability")
     ],
     title="Phishing Detection Model",
+    description="Enter a URL and its HTML content to predict if it's spam or legitimate. It's recommended to provide both for accurate results.",
+    theme="huggingface",
+    live=True,
+    css=".interface-container { border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; }"
 )
 # Launch the Gradio interface