File size: 2,807 Bytes
39f528e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from nltk.stem.snowball import SnowballStemmer
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB

class Models:

    def __init__(self):
        self.name = ''
        path = 'dataset/trainingdata.csv'
        df = pd.read_csv(path)
        df = df.dropna()
        self.x = df['sentences']
        self.y = df['sentiments']

    def mnb_classifier(self):
        self.name = 'MultinomialNB classifier'
        classifier = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()), ('clf', MultinomialNB())])
        return classifier.fit(self.x, self.y)

    def svm_classifier(self):
        self.name = 'SVM classifier'
        classifier = Pipeline([('vect', CountVectorizer()), ('tfidf', TfidfTransformer()),('clf-svm', SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, random_state=42))])
        classifier = classifier.fit(self.x, self.y)
        pickle.dump(classifier,open(self.name + '.pkl', "wb"))
        return classifier

    def mnb_stemmed_classifier(self):
        self.name = 'MultinomialNB stemmed classifier'
        self.stemmed_count_vect = StemmedCountVectorizer(stop_words='english')
        classifier = Pipeline([('vect', self.stemmed_count_vect), ('tfidf', TfidfTransformer()),('mnb', MultinomialNB(fit_prior=False))])
        classifier = classifier.fit(self.x, self.y)
        pickle.dump(classifier, open(self.name + '.pkl', "wb"))
        return classifier

    def svm_stemmed_classifier(self):
        self.name = 'SVM stemmed classifier'
        self.stemmed_count_vect = StemmedCountVectorizer(stop_words='english')
        classifier = Pipeline([('vect', self.stemmed_count_vect), ('tfidf', TfidfTransformer()),('clf-svm', SGDClassifier())])
        classifier = classifier.fit(self.x, self.y)
        pickle.dump(classifier, open(self.name + '.pkl', "wb"))
        return classifier

    def accuracy(self, model):
        predicted = model.predict(self.x)
        accuracy = np.mean(predicted == self.y)
        print(f"{self.name} has accuracy of {accuracy * 100} % ")

class StemmedCountVectorizer(CountVectorizer):

    def build_analyzer(self):
        stemmer = SnowballStemmer("english", ignore_stopwords=True)
        analyzer = super(StemmedCountVectorizer, self).build_analyzer()
        return lambda doc: ([stemmer.stem(w) for w in analyzer(doc)])


if __name__ == '__main__':
    model = Models()
    model.accuracy(model.mnb_classifier())
    model.accuracy(model.svm_classifier())
    model.accuracy(model.mnb_stemmed_classifier())
    model.accuracy(model.svm_stemmed_classifier())