Eason918 commited on
Commit
79f5c90
·
verified ·
1 Parent(s): 0a7d055

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import streamlit as st
3
+ from transformers import pipeline
4
+
5
+ # ---------------- CONFIG ----------------
6
+ # Load models
7
+ pipe1 = pipeline("text-classification", model="ElSlay/BERT-Phishing-Email-Model")
8
+ pipe2 = pipeline("text-classification", model="Eason918/malicious-url-detector")
9
+ pipe3 = pipeline("text-classification", model="r3ddkahili/final-complete-malicious-url-model")
10
+
11
+ # Label normalization
12
+ def normalize_label(label):
13
+ return "benign" if label == "LABEL_0" else "malicious"
14
+
15
+ # Weighted Ensemble Calculation (only pipeline2 and 3)
16
+ def calculate_weighted_prediction(label2, score2, label3, score3):
17
+ weights = {"Pipeline2": 0.3, "Pipeline3": 0.7}
18
+ score_dict = {"benign": 0.0, "malicious": 0.0}
19
+ score_dict[normalize_label(label2)] += weights["Pipeline2"] * score2
20
+ score_dict[normalize_label(label3)] += weights["Pipeline3"] * score3
21
+ final_label = max(score_dict, key=score_dict.get)
22
+ final_score = score_dict[final_label]
23
+ return final_label, final_score
24
+
25
+ # Extract URLs
26
+ def extract_urls(text):
27
+ url_pattern = r'(https?://[^\s]+|www\.[^\s]+)'
28
+ return re.findall(url_pattern, text)
29
+
30
+ # ---------------- UI START ----------------
31
+ st.set_page_config(page_title="📩 Email Malicious Detector", layout="wide")
32
+ st.markdown("<h1 style='text-align: center;'>📩 Malicious Email Detection App</h1>", unsafe_allow_html=True)
33
+
34
+ st.markdown("### ✉️ Enter your email content:")
35
+ email_text = st.text_area("Paste your email content here:", height=200)
36
+
37
+ if st.button("🚨 Scan Email & Analyze URL"):
38
+ if not email_text.strip():
39
+ st.warning("⚠️ Please input some email content.")
40
+ else:
41
+ result1 = pipe1(email_text)[0]
42
+ label1, score1 = result1['label'], result1['score']
43
+ pred1 = normalize_label(label1)
44
+
45
+ if pred1 == "benign":
46
+ st.success(f"✅ BENIGN EMAIL CONTENT (Confidence Score: {score1:.2%})")
47
+ else:
48
+ urls = extract_urls(email_text)
49
+ if not urls:
50
+ st.warning("⚠️ Email content is malicious, but no URL found for further analysis.")
51
+ else:
52
+ url = urls[0]
53
+ result2 = pipe2(url)[0]
54
+ result3 = pipe3(url)[0]
55
+ label2, score2 = result2['label'], result2['score']
56
+ label3, score3 = result3['label'], result3['score']
57
+
58
+ final_label, final_score = calculate_weighted_prediction(label2, score2, label3, score3)
59
+
60
+ st.markdown("## 🛡️ **Prediction Result:**")
61
+ if final_score < 0.6:
62
+ st.warning(f"🤔 URLs in email content are UNCERTAIN - Confidence too low ({final_score:.2%}). Please review manually.")
63
+ elif final_label == "benign":
64
+ st.success(f"✅ URLs in email content are BENIGN (Confidence Score: {final_score:.2%})")
65
+ else:
66
+ st.error(f"⚠️ URLs in email content are MALICIOUS (Confidence Score: {final_score:.2%})")