Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Nov 28 16:02:26 2016 | |
@author: Achille Souili | |
""" | |
import re | |
import nltk | |
class ComplexParser(object): | |
def __init__(self, sentence): | |
self.sentence = sentence | |
def extract_parameters(self): | |
sentence = self.sentence | |
concept = [] | |
words = nltk.word_tokenize(sentence) | |
sentence = nltk.pos_tag(words) | |
grammar = """CLAUSES: {<DT>?<JJ.*>?<DT><NN><.*>?<VB.*>?<.*>+} | |
""" | |
parameter_parser = nltk.RegexpParser(grammar) | |
tree = parameter_parser.parse(sentence) | |
for subtree in tree.subtrees(): | |
if subtree.label() == 'CLAUSES': | |
#print(subtree) | |
parameter_candidate = " ".join(word for word, tag in subtree.leaves()) | |
concept.append(parameter_candidate) | |
concept = "d".join(concept) | |
return concept | |
if __name__ == "__main__": | |
Paragraph = "in which the surface of diffusion (24) is concave." | |
words = nltk.word_tokenize(Paragraph) | |
tagged = nltk.pos_tag(words) | |
print(tagged) | |
get_parameter = ComplexParser(Paragraph) | |
parameters_list = get_parameter.extract_parameters() | |
print (parameters_list) | |