Spaces:

xin
/

PatentSolver

Build error

File size: 1,901 Bytes

22738ca

# -*- coding: utf-8 -*-


import nltk
from App.bin import constants

class SentenceClassifier(object):
    def __init__(self, sentence):
        self.sentence = sentence
        print("Classification....")


    def classifySentence(self):

        sentence = self.sentence

        def bagOfWords(labelled):
            wordsList = []
            for (words, sentiment) in labelled:
                wordsList.extend(words)
            return wordsList

        def wordFeatures(wordList):
            wordList = nltk.FreqDist(wordList)
            wordFeatures = wordList.keys()
            return wordFeatures

        def extract_Features(doc):
            docWords = set(doc)
            feat = {}
            for word in wordFeatures:
                feat['contains(%s)' % word] = (word in docWords)
            return feat


        with open(constants.ASSETS+"trainingsNegative") as l:
            problems = [tuple(map(str, i.strip().split(':'))) for i in l]
        with open(constants.ASSETS+"trainingsPositive") as f:
            solutions = [tuple(map(str, i.strip().split(':'))) for i in f]

        labelled = []
        for (words, polarity) in solutions + problems:
            words_filtered = [e.lower() for e in nltk.word_tokenize(words) if len(e) >= 3]
            labelled.append((words_filtered, polarity))



        wordFeatures = wordFeatures(bagOfWords(labelled))

        training_set = nltk.classify.apply_features(extract_Features, labelled)

        classifier = nltk.NaiveBayesClassifier.train(training_set)

        #print(classifier.show_most_informative_features(32))


        #print (sentence)
        #print("{0} \n Polarity: {1} \n".format(sentence, classifier.classify(extract_Features(sentence.split()))))
        classes = classifier.classify(extract_Features(sentence.split()))
        return classes