# -*- coding: utf-8 -*-
"""
Created on Mon Nov 28 16:02:26 2016

@author: Achille Souili
"""
import re
import nltk



class ComplexParser(object):

    def __init__(self, sentence):
        self.sentence = sentence

    def extract_parameters(self):
        sentence = self.sentence
        concept = []


        words = nltk.word_tokenize(sentence)
        sentence = nltk.pos_tag(words)
        grammar = """CLAUSES: {<DT>?<JJ.*>?<DT><NN><.*>?<VB.*>?<.*>+}
                              """
        parameter_parser = nltk.RegexpParser(grammar)
        tree = parameter_parser.parse(sentence)
        for subtree in tree.subtrees():
            if subtree.label() == 'CLAUSES':
                #print(subtree)
                parameter_candidate = " ".join(word for word, tag in subtree.leaves())
                concept.append(parameter_candidate)
        concept = "d".join(concept)
        return concept

if __name__ == "__main__":

    Paragraph = "in which the surface of diffusion (24) is concave."
    words = nltk.word_tokenize(Paragraph)
    tagged = nltk.pos_tag(words)
    print(tagged)
    get_parameter = ComplexParser(Paragraph)
    parameters_list = get_parameter.extract_parameters()

    print (parameters_list)