nouf12
/

NavyBayseAlogorthim

Text Classification

Model card Files Files and versions

xet

Community

Noufy commited on Dec 3, 2024

Commit

0ab7ba4

verified ·

1 Parent(s): 9f68ecc

Upload NavyBayes.py

Browse files

Files changed (1) hide show

NavyBayes.py +57 -139

NavyBayes.py CHANGED Viewed

@@ -1,160 +1,78 @@
-import firebase_admin # type: ignore
-from firebase_admin import credentials, firestore # type: ignore
-from joblib import dump, load # type: ignore
 import datetime
 import re
-from sklearn.feature_extraction.text import TfidfVectorizer # type: ignore
-from sklearn.naive_bayes import MultinomialNB # type: ignore
-import pandas as pd # type: ignore
-# التهيئة مرة واحدة فقط
-if not firebase_admin._apps:
-    # تأكد من وضع المسار الصحيح لملف التوثيق Firebase
-    cred = credentials.Certificate("D:/app-sentinel-7qnr19-firebase-adminsdk-kjmbe-f38e16a432.json")
-    firebase_admin.initialize_app(cred)
-db = firestore.client()
-# تحميل النموذج الحالي والمحول
 try:
-    model = load('model.joblib')
-    vectorizer = load('vectorizer.joblib')
-    print("Model and vectorizer loaded successfully.")
 except Exception as e:
-    model = None
-    vectorizer = None
-    print(f"Model and vectorizer not found. You need to train the model. Error: {e}")
-# 1. وظيفة لتحليل النصوص وتصنيفها
 def classify_and_store_message(message):
     global model, vectorizer
     try:
-        if not model or not vectorizer:
-            raise ValueError("Model or vectorizer not loaded. Train or load the model first.")
-        # تحويل الرسالة إلى سمات رقمية
         message_vector = vectorizer.transform([message])
         classification = model.predict(message_vector)[0]
-        # إعداد البيانات للتخزين
         message_data = {
-            'text': message,
-            'classification': classification,
-            'timestamp': datetime.datetime.now()
         }
-        # تخزين الرسالة في مجموعة Firestore حسب التصنيف
-        collection_name = classification.split('_')[0]  # استخدام الجزء الأول من التصنيف كاسم المجموعة
-        db.collection(collection_name).add(message_data)
-        # تخزين الرسالة في مجموعة 'all_messages' لجميع الرسائل
-        db.collection('all_messages').add(message_data)
-        # تخزين الرسالة في مجموعة 'recently_analyzed_messages'
-        db.collection('recently_analyzed_messages').add(message_data)
-        print(f"Message classified as {classification} and stored in Firestore.")
         return classification
     except Exception as e:
-        print(f"Error classifying message: {e}")
         return None
-# 2. وظيفة لتحليل النصوص المدخلة
-def analyze_input_text():
-    print("\n--- SMS Classification and Link Analysis Tool ---")
-    while True:
-        user_input = input("Enter a message to classify (or type 'exit' to quit): ").strip()
-        if user_input.lower() == 'exit':
-            print("Exiting the tool. Goodbye!")
-            break
-        # استخراج الروابط من النص المدخل
-        links = re.findall(r'(https?://[^\s]+)', user_input)
-        if links:
-            print(f"Detected links: {links}")
-            # تحليل الروابط (يمكن تطوير التحليل ليشمل أدوات أو خدمات خارجية)
-            for link in links:
-                # افتراض تحليل بسيط (يمكن تحسينه لاحقًا)
-                if "secure" in link or "safe" in link:
-                    print(f"Link '{link}' appears safe.")
-                else:
-                    print(f"Link '{link}' might be suspicious.")
-        else:
-            print("No links detected in the message.")
-        # تصنيف الرسالة
-        classification = classify_and_store_message(user_input)
-        if classification:
-            print(f"Message classified as: {classification}")
-        else:
-            print("Unable to classify the message. Please try again.")
-# 3. دالة لتحديث النموذج مع بيانات جديدة
-def update_model_with_new_data(new_messages, new_labels):
-    global model, vectorizer
-    try:
-        # تحميل البيانات الحالية
-        data = {
-            'message': new_messages,
-            'label': new_labels
-        }
-        df_new = pd.DataFrame(data)
-        # تحديث المحول والنموذج
-        if vectorizer is None or model is None:
-            vectorizer = TfidfVectorizer()
-            X_new = vectorizer.fit_transform(df_new['message'])
-        else:
-            X_new = vectorizer.transform(df_new['message'])
-        # جمع البيانات الجديدة مع القديمة وإعادة التدريب
-        y_new = df_new['label']
-        if model is None:
-            model = MultinomialNB()
-        model.partial_fit(X_new, y_new, classes=['spam_phishing', 'social_phishing', 'news_phishing', 'advertisement_phishing'])
-        # حفظ النموذج الجديد
-        dump(model, 'model.joblib')
-        dump(vectorizer, 'vectorizer.joblib')
-        print("Model updated and saved successfully.")
-    except Exception as e:
-        print(f"Error updating model: {e}")
-# 4. دالة لاختبار النظام
-def test_system():
-    test_messages = [
-        "Win a free vacation now! Visit https://spam-link.com",
-        "Breaking news: Major stock updates today.",
-        "Don't forget our meeting tomorrow at 10 AM.",
-        "Click here to secure your bank account: https://phishing-link.com",
-        "Exclusive offers just for you! Buy now at https://ad-link.com"
-    ]
-    for msg in test_messages:
-        print(f"\nAnalyzing message: {msg}")
-        analyze_input_text(msg)
-# 5. وظيفة للتصحيح اليدوي
-def correct_classification(message_id, correct_label):
-    try:
-        # جلب الرسالة من Firestore
-        message_ref = db.collection('all_messages').document(message_id)
-        message_data = message_ref.get().to_dict()
-        if not message_data:
-            print("Message not found.")
-            return
-        # تحديث التصنيف في Firestore
-        message_ref.update({'classification': correct_label})
-        # إضافة البيانات إلى نموذج التدريب الجديد
-        update_model_with_new_data([message_data['text']], [correct_label])
-        print(f"Message classification corrected to {correct_label} and model updated.")
-    except Exception as e:
-        print(f"Error correcting classification: {e}")
-# تشغيل تحليل النصوص
-analyze_input_text()

+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.naive_bayes import MultinomialNB
+from joblib import dump, load
+import firebase_admin
+from firebase_admin import credentials, firestore
+import logging
 import datetime
 import re
+import pandas as pd
+import os
+# إعداد السجلات
+logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
+# Firebase Initialization
 try:
+    # استخدم المسار الذي قدمته
+    cred_path = r"D:\app-sentinel-7qnr19-firebase-adminsdk-kjmbe-533749ec1a.json"
+    if not firebase_admin._apps:
+        cred = credentials.Certificate(cred_path)
+        firebase_admin.initialize_app(cred)
+    db = firestore.client()
+    logging.info("Firebase initialized successfully.")
 except Exception as e:
+    logging.error(f"Error initializing Firebase: {e}")
+    db = None
+# Load or Train Model
+try:
+    model_path = os.path.join(os.getcwd(), "model.joblib")
+    vectorizer_path = os.path.join(os.getcwd(), "vectorizer.joblib")
+    model = load(model_path)
+    vectorizer = load(vectorizer_path)
+    logging.info("Model and vectorizer loaded successfully.")
+except Exception as e:
+    logging.warning(f"Model and vectorizer not found. Training new ones. Error: {e}")
+    # Train new model and vectorizer
+    messages = ["example message 1", "example message 2"]
+    labels = ["label1", "label2"]
+    vectorizer = TfidfVectorizer()
+    X = vectorizer.fit_transform(messages)
+    model = MultinomialNB()
+    model.fit(X, labels)
+    dump(model, model_path)
+    dump(vectorizer, vectorizer_path)
+    logging.info("New model and vectorizer trained and saved.")
+# Classify Message
 def classify_and_store_message(message):
+    """
+    Classify a message and store the result in Firestore.
+    """
     global model, vectorizer
     try:
+        if not message.strip():
+            raise ValueError("Input message cannot be empty.")
+        # Transform the message using the vectorizer
         message_vector = vectorizer.transform([message])
         classification = model.predict(message_vector)[0]
+        # Prepare data for Firestore
         message_data = {
+            "text": message,
+            "classification": classification,
+            "timestamp": datetime.datetime.now(),
         }
+        if db:
+            db.collection("all_messages").add(message_data)
+            logging.info(f"Message classified as {classification} and stored.")
+        else:
+            logging.warning("Firestore is not initialized. Data not stored.")
         return classification
     except Exception as e:
+        logging.error(f"Error in classification: {e}")
         return None