Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import streamlit as st
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
# ---------------- CONFIG ----------------
|
6 |
+
# Load models
|
7 |
+
pipe1 = pipeline("text-classification", model="ElSlay/BERT-Phishing-Email-Model")
|
8 |
+
pipe2 = pipeline("text-classification", model="Eason918/malicious-url-detector")
|
9 |
+
pipe3 = pipeline("text-classification", model="r3ddkahili/final-complete-malicious-url-model")
|
10 |
+
|
11 |
+
# Label normalization
|
12 |
+
def normalize_label(label):
|
13 |
+
return "benign" if label == "LABEL_0" else "malicious"
|
14 |
+
|
15 |
+
# Weighted Ensemble Calculation (only pipeline2 and 3)
|
16 |
+
def calculate_weighted_prediction(label2, score2, label3, score3):
|
17 |
+
weights = {"Pipeline2": 0.3, "Pipeline3": 0.7}
|
18 |
+
score_dict = {"benign": 0.0, "malicious": 0.0}
|
19 |
+
score_dict[normalize_label(label2)] += weights["Pipeline2"] * score2
|
20 |
+
score_dict[normalize_label(label3)] += weights["Pipeline3"] * score3
|
21 |
+
final_label = max(score_dict, key=score_dict.get)
|
22 |
+
final_score = score_dict[final_label]
|
23 |
+
return final_label, final_score
|
24 |
+
|
25 |
+
# Extract URLs
|
26 |
+
def extract_urls(text):
|
27 |
+
url_pattern = r'(https?://[^\s]+|www\.[^\s]+)'
|
28 |
+
return re.findall(url_pattern, text)
|
29 |
+
|
30 |
+
# ---------------- UI START ----------------
|
31 |
+
st.set_page_config(page_title="📩 Email Malicious Detector", layout="wide")
|
32 |
+
st.markdown("<h1 style='text-align: center;'>📩 Malicious Email Detection App</h1>", unsafe_allow_html=True)
|
33 |
+
|
34 |
+
st.markdown("### ✉️ Enter your email content:")
|
35 |
+
email_text = st.text_area("Paste your email content here:", height=200)
|
36 |
+
|
37 |
+
if st.button("🚨 Scan Email & Analyze URL"):
|
38 |
+
if not email_text.strip():
|
39 |
+
st.warning("⚠️ Please input some email content.")
|
40 |
+
else:
|
41 |
+
result1 = pipe1(email_text)[0]
|
42 |
+
label1, score1 = result1['label'], result1['score']
|
43 |
+
pred1 = normalize_label(label1)
|
44 |
+
|
45 |
+
if pred1 == "benign":
|
46 |
+
st.success(f"✅ BENIGN EMAIL CONTENT (Confidence Score: {score1:.2%})")
|
47 |
+
else:
|
48 |
+
urls = extract_urls(email_text)
|
49 |
+
if not urls:
|
50 |
+
st.warning("⚠️ Email content is malicious, but no URL found for further analysis.")
|
51 |
+
else:
|
52 |
+
url = urls[0]
|
53 |
+
result2 = pipe2(url)[0]
|
54 |
+
result3 = pipe3(url)[0]
|
55 |
+
label2, score2 = result2['label'], result2['score']
|
56 |
+
label3, score3 = result3['label'], result3['score']
|
57 |
+
|
58 |
+
final_label, final_score = calculate_weighted_prediction(label2, score2, label3, score3)
|
59 |
+
|
60 |
+
st.markdown("## 🛡️ **Prediction Result:**")
|
61 |
+
if final_score < 0.6:
|
62 |
+
st.warning(f"🤔 URLs in email content are UNCERTAIN - Confidence too low ({final_score:.2%}). Please review manually.")
|
63 |
+
elif final_label == "benign":
|
64 |
+
st.success(f"✅ URLs in email content are BENIGN (Confidence Score: {final_score:.2%})")
|
65 |
+
else:
|
66 |
+
st.error(f"⚠️ URLs in email content are MALICIOUS (Confidence Score: {final_score:.2%})")
|