Spaces:
Runtime error
Runtime error
import pandas as pd | |
import numpy as np | |
import pickle | |
from sklearn.feature_extraction.text import CountVectorizer | |
from sklearn.feature_extraction.text import TfidfTransformer | |
from sklearn.pipeline import Pipeline | |
from nltk.stem.snowball import SnowballStemmer | |
from sklearn.linear_model import SGDClassifier | |
from sklearn.naive_bayes import MultinomialNB | |
class Models: | |
def __init__(self): | |
self.name = '' | |
path = 'dataset/trainingdata.csv' | |
df = pd.read_csv(path) | |
df = df.dropna() | |
self.x = df['sentences'] | |
self.y = df['sentiments'] | |
def mnb_classifier(self): | |
self.name = 'MultinomialNB classifier' | |
classifier = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB())]) | |
return classifier.fit(self.x, self.y) | |
def svm_classifier(self): | |
self.name = 'SVM classifier' | |
classifier = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()),('clf-svm', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42))]) | |
classifier = classifier.fit(self.x, self.y) | |
pickle.dump(classifier,open(self.name + '.pkl', "wb")) | |
return classifier | |
def mnb_stemmed_classifier(self): | |
self.name = 'MultinomialNB stemmed classifier' | |
self.stemmed_count_vect = StemmedCountVectorizer(stop_words='english') | |
classifier = Pipeline([('vect', self.stemmed_count_vect), ('tfidf', TfidfTransformer()),('mnb', MultinomialNB(fit_prior=False))]) | |
classifier = classifier.fit(self.x, self.y) | |
pickle.dump(classifier, open(self.name + '.pkl', "wb")) | |
return classifier | |
def svm_stemmed_classifier(self): | |
self.name = 'SVM stemmed classifier' | |
self.stemmed_count_vect = StemmedCountVectorizer(stop_words='english') | |
classifier = Pipeline([('vect', self.stemmed_count_vect), ('tfidf', TfidfTransformer()),('clf-svm', SGDClassifier())]) | |
classifier = classifier.fit(self.x, self.y) | |
pickle.dump(classifier, open(self.name + '.pkl', "wb")) | |
return classifier | |
def accuracy(self, model): | |
predicted = model.predict(self.x) | |
accuracy = np.mean(predicted == self.y) | |
print(f"{self.name} has accuracy of {accuracy * 100} % ") | |
class StemmedCountVectorizer(CountVectorizer): | |
def build_analyzer(self): | |
stemmer = SnowballStemmer("english", ignore_stopwords=True) | |
analyzer = super(StemmedCountVectorizer, self).build_analyzer() | |
return lambda doc: ([stemmer.stem(w) for w in analyzer(doc)]) | |
if __name__ == '__main__': | |
model = Models() | |
model.accuracy(model.mnb_classifier()) | |
model.accuracy(model.svm_classifier()) | |
model.accuracy(model.mnb_stemmed_classifier()) | |
model.accuracy(model.svm_stemmed_classifier()) | |