File size: 2,989 Bytes
79f5c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import re
import streamlit as st
from transformers import pipeline

# ---------------- CONFIG ----------------
# Load models
pipe1 = pipeline("text-classification", model="ElSlay/BERT-Phishing-Email-Model")
pipe2 = pipeline("text-classification", model="Eason918/malicious-url-detector")
pipe3 = pipeline("text-classification", model="r3ddkahili/final-complete-malicious-url-model")

# Label normalization
def normalize_label(label):
    return "benign" if label == "LABEL_0" else "malicious"

# Weighted Ensemble Calculation (only pipeline2 and 3)
def calculate_weighted_prediction(label2, score2, label3, score3):
    weights = {"Pipeline2": 0.3, "Pipeline3": 0.7}
    score_dict = {"benign": 0.0, "malicious": 0.0}
    score_dict[normalize_label(label2)] += weights["Pipeline2"] * score2
    score_dict[normalize_label(label3)] += weights["Pipeline3"] * score3
    final_label = max(score_dict, key=score_dict.get)
    final_score = score_dict[final_label]
    return final_label, final_score

# Extract URLs
def extract_urls(text):
    url_pattern = r'(https?://[^\s]+|www\.[^\s]+)'
    return re.findall(url_pattern, text)

# ---------------- UI START ----------------
st.set_page_config(page_title="📩 Email Malicious Detector", layout="wide")
st.markdown("<h1 style='text-align: center;'>📩 Malicious Email Detection App</h1>", unsafe_allow_html=True)

st.markdown("### ✉️ Enter your email content:")
email_text = st.text_area("Paste your email content here:", height=200)

if st.button("🚨 Scan Email & Analyze URL"):
    if not email_text.strip():
        st.warning("⚠️ Please input some email content.")
    else:
        result1 = pipe1(email_text)[0]
        label1, score1 = result1['label'], result1['score']
        pred1 = normalize_label(label1)

        if pred1 == "benign":
            st.success(f"✅ BENIGN EMAIL CONTENT (Confidence Score: {score1:.2%})")
        else:
            urls = extract_urls(email_text)
            if not urls:
                st.warning("⚠️ Email content is malicious, but no URL found for further analysis.")
            else:
                url = urls[0]
                result2 = pipe2(url)[0]
                result3 = pipe3(url)[0]
                label2, score2 = result2['label'], result2['score']
                label3, score3 = result3['label'], result3['score']

                final_label, final_score = calculate_weighted_prediction(label2, score2, label3, score3)

                st.markdown("## 🛡️ **Prediction Result:**")
                if final_score < 0.6:
                    st.warning(f"🤔 URLs in email content are UNCERTAIN - Confidence too low ({final_score:.2%}). Please review manually.")
                elif final_label == "benign":
                    st.success(f"✅ URLs in email content are BENIGN (Confidence Score: {final_score:.2%})")
                else:
                    st.error(f"⚠️ URLs in email content are MALICIOUS (Confidence Score: {final_score:.2%})")