File size: 977 Bytes
22738ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding: utf-8 -*-

#java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer --port 8080
import glob
import os


class InputHandler(object):

    def __init__(self, folder_path, extension):
        self.folder_path = folder_path
        self.extension = extension

        print("Handling Corpus...")


    def _get_dirs(self, base):
        return [x for x in glob.iglob(os.path.join(base, '*')) if os.path.isdir(x)]

    def get_base_file(self, base, pattern):
        lList = []
        lList.extend(glob.glob(os.path.join(base, pattern)))
        dirs = self._get_dirs(base)
        if len(dirs):
            for d in dirs:
                lList.extend(self.get_base_file(os.path.join(base, d), pattern))
        return lList

    def get_input(self):
        folder_path = self.folder_path
        extension = self.extension
        patent_files = self.get_base_file(folder_path, extension)
        return patent_files