|
from sklearn.feature_extraction.text import CountVectorizer |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.naive_bayes import MultinomialNB |
|
from sklearn.metrics import accuracy_score, classification_report |
|
from pandas import * |
|
import pickle |
|
import sys |
|
|
|
|
|
|
|
|
|
|
|
|
|
filename = 'classifier.bin' |
|
|
|
|
|
def train(): |
|
data = read_csv("train.csv") |
|
text = data['text'].tolist() |
|
label = data['label'].tolist() |
|
|
|
vectorizer = CountVectorizer() |
|
X = vectorizer.fit_transform(text) |
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, label, test_size=0.2, random_state=42) |
|
|
|
|
|
classifier = MultinomialNB() |
|
classifier.fit(X_train, y_train) |
|
|
|
|
|
y_pred = classifier.predict(X_test) |
|
accuracy = accuracy_score(y_test, y_pred) |
|
report = classification_report(y_test, y_pred) |
|
|
|
print(f"Accuracy: {accuracy}") |
|
print("Classification Report:\n", report) |
|
|
|
pickle.dump(classifier, open(filename, 'wb')) |
|
pickle.dump(vectorizer, open('vectorizer.pkl', 'wb')) |
|
|
|
def classify(text): |
|
classifier = pickle.load(open(filename, 'rb')) |
|
vectorizer = pickle.load(open('vectorizer.pkl', 'rb')) |
|
|
|
new_text = [text] |
|
|
|
|
|
new_text_features = vectorizer.transform(new_text) |
|
|
|
|
|
predicted_label = classifier.predict(new_text_features) |
|
return predicted_label[0] |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
train() |
|
classification = input("would you like to classify(Y/n)?\n") |
|
if classification.upper() == "Y": |
|
print(classify("I like turtles, do you?")) |
|
else: |
|
pass |
|
|