Spaces:

xin
/

PatentSolver

Build error

File size: 2,121 Bytes

22738ca

from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment.vader import SentimentIntensityAnalyzer

class SharpClassifier(object):
    def __init__(self, sentence):
        self.sentence = sentence
        print("Classification....")

    def classify(self):
        sentence = self.sentence
        n_instances = 100
        subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
        obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]
        len(subj_docs), len(obj_docs)

        train_subj_docs = subj_docs[:80]
        test_subj_docs = subj_docs[80:100]
        train_obj_docs = obj_docs[:80]
        test_obj_docs = obj_docs[80:100]
        training_docs = train_subj_docs + train_obj_docs
        testing_docs = test_subj_docs + test_obj_docs

        sentim_analyzer = SentimentAnalyzer()
        all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

        unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)

        sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
        training_set = sentim_analyzer.apply_features(training_docs)
        test_set = sentim_analyzer.apply_features(testing_docs)

        trainer = NaiveBayesClassifier.train
        classifier = sentim_analyzer.train(trainer, training_set)
        # for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
        #     print('{0}: {1}'.format(key, value))

        sid = SentimentIntensityAnalyzer()
        ss = sid.polarity_scores(sentence)
        polarity = ''
        if ss['neg'] < ss['pos']:
            polarity = 'partialSolution'
        elif ss['neg'] > ss['pos']:
            polarity = 'problem'
        else:
            polarity ='neutre'
        # for k in sorted(ss):
        #     print('{0}: {1}, '.format(k, ss[k]), end='')
        return polarity