Noufy commited on
Commit
0ab7ba4
·
verified ·
1 Parent(s): 9f68ecc

Upload NavyBayes.py

Browse files
Files changed (1) hide show
  1. NavyBayes.py +57 -139
NavyBayes.py CHANGED
@@ -1,160 +1,78 @@
1
- import firebase_admin # type: ignore
2
- from firebase_admin import credentials, firestore # type: ignore
3
- from joblib import dump, load # type: ignore
 
 
 
4
  import datetime
5
  import re
6
- from sklearn.feature_extraction.text import TfidfVectorizer # type: ignore
7
- from sklearn.naive_bayes import MultinomialNB # type: ignore
8
- import pandas as pd # type: ignore
9
 
10
- # التهيئة مرة واحدة فقط
11
- if not firebase_admin._apps:
12
- # تأكد من وضع المسار الصحيح لملف التوثيق Firebase
13
- cred = credentials.Certificate("D:/app-sentinel-7qnr19-firebase-adminsdk-kjmbe-f38e16a432.json")
14
- firebase_admin.initialize_app(cred)
15
 
16
- db = firestore.client()
17
-
18
- # تحميل النموذج الحالي والمحول
19
  try:
20
- model = load('model.joblib')
21
- vectorizer = load('vectorizer.joblib')
22
- print("Model and vectorizer loaded successfully.")
 
 
 
 
23
  except Exception as e:
24
- model = None
25
- vectorizer = None
26
- print(f"Model and vectorizer not found. You need to train the model. Error: {e}")
27
 
28
- # 1. وظيفة لتحليل النصوص وتصنيفها
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def classify_and_store_message(message):
 
 
 
30
  global model, vectorizer
31
  try:
32
- if not model or not vectorizer:
33
- raise ValueError("Model or vectorizer not loaded. Train or load the model first.")
34
 
35
- # تحويل الرسالة إلى سمات رقمية
36
  message_vector = vectorizer.transform([message])
37
  classification = model.predict(message_vector)[0]
38
 
39
- # إعداد البيانات للتخزين
40
  message_data = {
41
- 'text': message,
42
- 'classification': classification,
43
- 'timestamp': datetime.datetime.now()
44
  }
45
 
46
- # تخزين الرسالة في مجموعة Firestore حسب التصنيف
47
- collection_name = classification.split('_')[0] # استخدام الجزء الأول من التصنيف كاسم المجموعة
48
- db.collection(collection_name).add(message_data)
49
-
50
- # تخزين الرسالة في مجموعة 'all_messages' لجميع الرسائل
51
- db.collection('all_messages').add(message_data)
52
-
53
- # تخزين الرسالة في مجموعة 'recently_analyzed_messages'
54
- db.collection('recently_analyzed_messages').add(message_data)
55
 
56
- print(f"Message classified as {classification} and stored in Firestore.")
57
  return classification
58
-
59
  except Exception as e:
60
- print(f"Error classifying message: {e}")
61
  return None
62
-
63
- # 2. وظيفة لتحليل النصوص المدخلة
64
- def analyze_input_text():
65
- print("\n--- SMS Classification and Link Analysis Tool ---")
66
- while True:
67
- user_input = input("Enter a message to classify (or type 'exit' to quit): ").strip()
68
- if user_input.lower() == 'exit':
69
- print("Exiting the tool. Goodbye!")
70
- break
71
-
72
- # استخراج الروابط من النص المدخل
73
- links = re.findall(r'(https?://[^\s]+)', user_input)
74
- if links:
75
- print(f"Detected links: {links}")
76
- # تحليل الروابط (يمكن تطوير التحليل ليشمل أدوات أو خدمات خارجية)
77
- for link in links:
78
- # افتراض تحليل بسيط (يمكن تحسينه لاحقًا)
79
- if "secure" in link or "safe" in link:
80
- print(f"Link '{link}' appears safe.")
81
- else:
82
- print(f"Link '{link}' might be suspicious.")
83
- else:
84
- print("No links detected in the message.")
85
-
86
- # تصنيف الرسالة
87
- classification = classify_and_store_message(user_input)
88
- if classification:
89
- print(f"Message classified as: {classification}")
90
- else:
91
- print("Unable to classify the message. Please try again.")
92
-
93
- # 3. دالة لتحديث النموذج مع بيانات جديدة
94
- def update_model_with_new_data(new_messages, new_labels):
95
- global model, vectorizer
96
- try:
97
- # تحميل البيانات الحالية
98
- data = {
99
- 'message': new_messages,
100
- 'label': new_labels
101
- }
102
- df_new = pd.DataFrame(data)
103
-
104
- # تحديث المحول والنموذج
105
- if vectorizer is None or model is None:
106
- vectorizer = TfidfVectorizer()
107
- X_new = vectorizer.fit_transform(df_new['message'])
108
- else:
109
- X_new = vectorizer.transform(df_new['message'])
110
-
111
- # جمع البيانات الجديدة مع القديمة وإعادة التدريب
112
- y_new = df_new['label']
113
- if model is None:
114
- model = MultinomialNB()
115
- model.partial_fit(X_new, y_new, classes=['spam_phishing', 'social_phishing', 'news_phishing', 'advertisement_phishing'])
116
-
117
- # حفظ النموذج الجديد
118
- dump(model, 'model.joblib')
119
- dump(vectorizer, 'vectorizer.joblib')
120
- print("Model updated and saved successfully.")
121
-
122
- except Exception as e:
123
- print(f"Error updating model: {e}")
124
-
125
- # 4. دالة لاختبار النظام
126
- def test_system():
127
- test_messages = [
128
- "Win a free vacation now! Visit https://spam-link.com",
129
- "Breaking news: Major stock updates today.",
130
- "Don't forget our meeting tomorrow at 10 AM.",
131
- "Click here to secure your bank account: https://phishing-link.com",
132
- "Exclusive offers just for you! Buy now at https://ad-link.com"
133
- ]
134
-
135
- for msg in test_messages:
136
- print(f"\nAnalyzing message: {msg}")
137
- analyze_input_text(msg)
138
-
139
- # 5. وظيفة للتصحيح اليدوي
140
- def correct_classification(message_id, correct_label):
141
- try:
142
- # جلب الرسالة من Firestore
143
- message_ref = db.collection('all_messages').document(message_id)
144
- message_data = message_ref.get().to_dict()
145
-
146
- if not message_data:
147
- print("Message not found.")
148
- return
149
-
150
- # تحديث التصنيف في Firestore
151
- message_ref.update({'classification': correct_label})
152
-
153
- # إضافة البيانات إلى نموذج التدريب الجديد
154
- update_model_with_new_data([message_data['text']], [correct_label])
155
- print(f"Message classification corrected to {correct_label} and model updated.")
156
- except Exception as e:
157
- print(f"Error correcting classification: {e}")
158
-
159
- # تشغيل تحليل النصوص
160
- analyze_input_text()
 
1
+ from sklearn.feature_extraction.text import TfidfVectorizer
2
+ from sklearn.naive_bayes import MultinomialNB
3
+ from joblib import dump, load
4
+ import firebase_admin
5
+ from firebase_admin import credentials, firestore
6
+ import logging
7
  import datetime
8
  import re
9
+ import pandas as pd
10
+ import os
 
11
 
12
+ # إعداد السجلات
13
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
 
 
 
14
 
15
+ # Firebase Initialization
 
 
16
  try:
17
+ # استخدم المسار الذي قدمته
18
+ cred_path = r"D:\app-sentinel-7qnr19-firebase-adminsdk-kjmbe-533749ec1a.json"
19
+ if not firebase_admin._apps:
20
+ cred = credentials.Certificate(cred_path)
21
+ firebase_admin.initialize_app(cred)
22
+ db = firestore.client()
23
+ logging.info("Firebase initialized successfully.")
24
  except Exception as e:
25
+ logging.error(f"Error initializing Firebase: {e}")
26
+ db = None
 
27
 
28
+ # Load or Train Model
29
+ try:
30
+ model_path = os.path.join(os.getcwd(), "model.joblib")
31
+ vectorizer_path = os.path.join(os.getcwd(), "vectorizer.joblib")
32
+ model = load(model_path)
33
+ vectorizer = load(vectorizer_path)
34
+ logging.info("Model and vectorizer loaded successfully.")
35
+ except Exception as e:
36
+ logging.warning(f"Model and vectorizer not found. Training new ones. Error: {e}")
37
+ # Train new model and vectorizer
38
+ messages = ["example message 1", "example message 2"]
39
+ labels = ["label1", "label2"]
40
+ vectorizer = TfidfVectorizer()
41
+ X = vectorizer.fit_transform(messages)
42
+ model = MultinomialNB()
43
+ model.fit(X, labels)
44
+ dump(model, model_path)
45
+ dump(vectorizer, vectorizer_path)
46
+ logging.info("New model and vectorizer trained and saved.")
47
+
48
+ # Classify Message
49
  def classify_and_store_message(message):
50
+ """
51
+ Classify a message and store the result in Firestore.
52
+ """
53
  global model, vectorizer
54
  try:
55
+ if not message.strip():
56
+ raise ValueError("Input message cannot be empty.")
57
 
58
+ # Transform the message using the vectorizer
59
  message_vector = vectorizer.transform([message])
60
  classification = model.predict(message_vector)[0]
61
 
62
+ # Prepare data for Firestore
63
  message_data = {
64
+ "text": message,
65
+ "classification": classification,
66
+ "timestamp": datetime.datetime.now(),
67
  }
68
 
69
+ if db:
70
+ db.collection("all_messages").add(message_data)
71
+ logging.info(f"Message classified as {classification} and stored.")
72
+ else:
73
+ logging.warning("Firestore is not initialized. Data not stored.")
 
 
 
 
74
 
 
75
  return classification
 
76
  except Exception as e:
77
+ logging.error(f"Error in classification: {e}")
78
  return None