File size: 2,169 Bytes
3af5364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from pandas import *
import pickle
import sys

# Step 1: Data Preparation
# Assuming you have 'text' and 'label' columns in your dataset.
# Load your data into a pandas DataFrame or use any other data loading method.
# For this example, we'll use a simple list of reviews and labels.

filename = 'classifier.bin'


def train():
    data = read_csv("train.csv")
    text = data['text'].tolist()
    label = data['label'].tolist()
    # Step 2: Text Preprocessing and Feature Extraction
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(text)
    # Step 4: Split Data
    X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.2, random_state=42)

    # Step 5: Create and Train the Classifier
    classifier = MultinomialNB()
    classifier.fit(X_train, y_train)

    # Step 6: Evaluate the Classifier
    y_pred = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f"Accuracy: {accuracy}")
    print("Classification Report:\n", report)
    # save the model to disk
    pickle.dump(classifier, open(filename, 'wb')) 
    pickle.dump(vectorizer, open('vectorizer.pkl', 'wb'))

def classify(text):
    classifier = pickle.load(open(filename, 'rb'))
    vectorizer = pickle.load(open('vectorizer.pkl', 'rb'))
    # New text you want to classify
    new_text = [text]

    # Preprocess and convert new text into numerical features using the same vectorizer
    new_text_features = vectorizer.transform(new_text)

    # Use the trained classifier to predict the label
    predicted_label = classifier.predict(new_text_features)
    return predicted_label[0]

    #print(f"Predicted Label: {predicted_label[0]}")

if __name__ == '__main__':
    train()
    classification = input("would you like to classify(Y/n)?\n")
    if classification.upper() == "Y":
        print(classify("I like turtles, do you?"))
    else:
        pass