|
import re |
|
import streamlit as st |
|
from transformers import pipeline |
|
|
|
|
|
|
|
pipe1 = pipeline("text-classification", model="ElSlay/BERT-Phishing-Email-Model") |
|
pipe2 = pipeline("text-classification", model="Eason918/malicious-url-detector") |
|
pipe3 = pipeline("text-classification", model="r3ddkahili/final-complete-malicious-url-model") |
|
|
|
|
|
def normalize_label(label): |
|
return "benign" if label == "LABEL_0" else "malicious" |
|
|
|
|
|
def calculate_weighted_prediction(label2, score2, label3, score3): |
|
weights = {"Pipeline2": 0.3, "Pipeline3": 0.7} |
|
score_dict = {"benign": 0.0, "malicious": 0.0} |
|
score_dict[normalize_label(label2)] += weights["Pipeline2"] * score2 |
|
score_dict[normalize_label(label3)] += weights["Pipeline3"] * score3 |
|
final_label = max(score_dict, key=score_dict.get) |
|
final_score = score_dict[final_label] |
|
return final_label, final_score |
|
|
|
|
|
def extract_urls(text): |
|
url_pattern = r'(https?://[^\s]+|www\.[^\s]+)' |
|
return re.findall(url_pattern, text) |
|
|
|
|
|
st.set_page_config(page_title="π© Email Malicious Detector", layout="wide") |
|
st.markdown("<h1 style='text-align: center;'>π© Malicious Email Detection App</h1>", unsafe_allow_html=True) |
|
|
|
st.markdown("### βοΈ Enter your email content:") |
|
email_text = st.text_area("Paste your email content here:", height=200) |
|
|
|
if st.button("π¨ Scan Email & Analyze URL"): |
|
if not email_text.strip(): |
|
st.warning("β οΈ Please input some email content.") |
|
else: |
|
result1 = pipe1(email_text)[0] |
|
label1, score1 = result1['label'], result1['score'] |
|
pred1 = normalize_label(label1) |
|
|
|
if pred1 == "benign": |
|
st.markdown("## π‘οΈ **Prediction Result:**") |
|
st.success(f"β
BENIGN EMAIL CONTENT (Confidence Score: {score1:.2%})") |
|
else: |
|
urls = extract_urls(email_text) |
|
if not urls: |
|
st.warning("β οΈ Email content is malicious, but no URL found for further analysis.") |
|
else: |
|
url = urls[0] |
|
result2 = pipe2(url)[0] |
|
result3 = pipe3(url)[0] |
|
label2, score2 = result2['label'], result2['score'] |
|
label3, score3 = result3['label'], result3['score'] |
|
|
|
final_label, final_score = calculate_weighted_prediction(label2, score2, label3, score3) |
|
|
|
st.markdown("## π‘οΈ **Prediction Result:**") |
|
if final_score < 0.6: |
|
st.warning(f"π€ URLs in email content are UNCERTAIN - Confidence too low ({final_score:.2%}). Please review manually.") |
|
elif final_label == "benign": |
|
st.success(f"β
URLs in email content are BENIGN (Confidence Score: {final_score:.2%})") |
|
else: |
|
st.error(f"β οΈ URLs in email content are MALICIOUS (Confidence Score: {final_score:.2%})") |
|
|