File size: 9,268 Bytes
57cf043
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
"""This module includes classes to define configurations."""

from typing import Any, Dict, List, Optional

from pyaml_env import parse_config
from pydantic import BaseModel


class Query(BaseModel):
    query: str
    query_abbreviation: str
    abbreviations_replaced: Optional[List] = None
    userName: Optional[str] = None


class SemanticChunk(BaseModel):
    index_answer: int
    doc_name: str
    title: str
    text_answer: str
    # doc_number: str  # TODO Потом поменять название переменной на doc_id везде с чем это будет связанно
    other_info: List
    start_index_paragraph: int


class FilterChunks(BaseModel):
    id: str
    filename: str
    title: str
    chunks: List[SemanticChunk]


class BusinessProcess(BaseModel):
    production_activities_section: Optional[str]
    processes_name: Optional[str]
    level_process: Optional[str]


class Lead(BaseModel):
    person: Optional[str]
    leads: Optional[str]


class Subordinate(BaseModel):
    person_name: Optional[str]
    position: Optional[str]


class OrganizationalStructure(BaseModel):
    position: Optional[str] = None
    leads: Optional[List[Lead]] = None
    subordinates: Optional[Subordinate] = None


class RocksNN(BaseModel):
    division: Optional[str]
    company_name: Optional[str]


class RocksNNSearch(BaseModel):
    division: Optional[str]
    company_name: Optional[List]


class SegmentationSearch(BaseModel):
    segmentation_model: Optional[str]
    company_name: Optional[List]


class Group(BaseModel):
    group_name: Optional[str]
    position_in_group: Optional[str]
    block: Optional[str]


class GroupComposition(BaseModel):
    person_name: Optional[str]
    position_in_group: Optional[str]


class SearchGroupComposition(BaseModel):
    group_name: Optional[str]
    group_composition: Optional[List[GroupComposition]]


class PeopleChunks(BaseModel):
    business_processes: Optional[List[BusinessProcess]] = None
    organizatinal_structure: Optional[List[OrganizationalStructure]] = None
    business_curator: Optional[List[RocksNN]] = None
    groups: Optional[List[Group]] = None
    person_name: str


class SummaryChunks(BaseModel):
    doc_chunks: Optional[List[FilterChunks]] = None
    people_search: Optional[List[PeopleChunks]] = None
    groups_search: Optional[SearchGroupComposition] = None
    rocks_nn_search: Optional[RocksNNSearch] = None
    segmentation_search: Optional[SegmentationSearch] = None
    query_type: str = '[3]'


class ElasticConfiguration:
    def __init__(self, config_data):
        self.es_host = str(config_data['es_host'])
        self.es_port = int(config_data['es_port'])
        self.use_elastic = bool(config_data['use_elastic'])
        self.people_path = str(config_data['people_path'])


class FaissDataConfiguration:
    def __init__(self, config_data):
        self.model_embedding_path = str(config_data['model_embedding_path'])
        self.device = str(config_data['device'])
        self.path_to_metadata = str(config_data['path_to_metadata'])


class ChunksElasticSearchConfiguration:
    def __init__(self, config_data):
        self.use_chunks_search = bool(config_data['use_chunks_search'])
        self.index_name = str(config_data['index_name'])
        self.k_neighbors = int(config_data['k_neighbors'])


class PeopleSearchConfiguration:
    def __init__(self, config_data):
        self.use_people_search = bool(config_data['use_people_search'])
        self.index_name = str(config_data['index_name'])
        self.k_neighbors = int(config_data['k_neighbors'])


class VectorSearchConfiguration:
    def __init__(self, config_data):
        self.use_vector_search = bool(config_data['use_vector_search'])
        self.k_neighbors = int(config_data['k_neighbors'])


class GroupsSearchConfiguration:
    def __init__(self, config_data):
        self.use_groups_search = bool(config_data['use_groups_search'])
        self.index_name = str(config_data['index_name'])
        self.k_neighbors = int(config_data['k_neighbors'])


class RocksNNSearchConfiguration:
    def __init__(self, config_data):
        self.use_rocks_nn_search = bool(config_data['use_rocks_nn_search'])
        self.index_name = str(config_data['index_name'])
        self.k_neighbors = int(config_data['k_neighbors'])


class AbbreviationSearchConfiguration:
    def __init__(self, config_data):
        self.use_abbreviation_search = bool(config_data['use_abbreviation_search'])
        self.index_name = str(config_data['index_name'])
        self.k_neighbors = int(config_data['k_neighbors'])


class SegmentationSearchConfiguration:
    def __init__(self, config_data):
        self.use_segmentation_search = bool(config_data['use_segmentation_search'])
        self.index_name = str(config_data['index_name'])
        self.k_neighbors = int(config_data['k_neighbors'])


class SearchConfiguration:
    def __init__(self, config_data):
        self.vector_search = VectorSearchConfiguration(config_data['vector_search'])
        self.people_elastic_search = PeopleSearchConfiguration(
            config_data['people_elastic_search']
        )
        self.chunks_elastic_search = ChunksElasticSearchConfiguration(
            config_data['chunks_elastic_search']
        )
        self.groups_elastic_search = GroupsSearchConfiguration(
            config_data['groups_elastic_search']
        )
        self.rocks_nn_elastic_search = RocksNNSearchConfiguration(
            config_data['rocks_nn_elastic_search']
        )
        self.segmentation_elastic_search = SegmentationSearchConfiguration(
            config_data['segmentation_elastic_search']
        )
        self.stop_index_names = list(config_data['stop_index_names'])
        self.abbreviation_search = AbbreviationSearchConfiguration(
            config_data['abbreviation_search']
        )


class FilesConfiguration:
    def __init__(self, config_data):
        self.empty_start = bool(config_data['empty_start'])
        self.regulations_path = str(config_data['regulations_path'])
        self.default_regulations_path = str(config_data['default_regulations_path'])
        self.documents_path = str(config_data['documents_path'])


class RankingConfiguration:
    def __init__(self, config_data):
        self.use_ranging = bool(config_data['use_ranging'])
        self.alpha = float(config_data['alpha'])
        self.beta = float(config_data['beta'])
        self.k_neighbors = int(config_data['k_neighbors'])


class DataBaseConfiguration:
    def __init__(self, config_data):
        self.elastic = ElasticConfiguration(config_data['elastic'])
        self.faiss = FaissDataConfiguration(config_data['faiss'])
        self.search = SearchConfiguration(config_data['search'])
        self.files = FilesConfiguration(config_data['files'])
        self.ranker = RankingConfiguration(config_data['ranging'])


class LLMConfiguration:
    def __init__(self, config_data):
        self.base_url = str(config_data['base_url']) if config_data['base_url'] not in ("", "null", "None") else None
        self.api_key_env = (
            str(config_data['api_key_env'])
            if config_data['api_key_env'] not in ("", "null", "None")
            else None
        )
        self.model = str(config_data['model'])
        self.tokenizer = str(config_data['tokenizer_name'])
        self.temperature = float(config_data['temperature'])
        self.top_p = float(config_data['top_p'])
        self.min_p = float(config_data['min_p'])
        self.frequency_penalty = float(config_data['frequency_penalty'])
        self.presence_penalty = float(config_data['presence_penalty'])
        self.seed = int(config_data['seed'])


class CommonConfiguration:
    def __init__(self, config_data):
        self.log_file_path = str(config_data['log_file_path'])
        self.log_sql_path = str(config_data['log_sql_path'])


class Configuration:
    """Encapsulates all configuration parameters."""

    def __init__(self, config_file_path: Optional[str] = None):
        """Creates an instance of the class.

        There is 1 possibility to load configuration data:
            - from configuration file using a path;
        If attribute is not None, the configuration file is used.

        Args:
            config_file_path: A path to config file to load configuration data from.
        """
        if config_file_path is not None:
            self._load_from_config(config_file_path)
        else:
            raise ValueError('At least one of config_path must be not None.')

    def _load_data(self, data: Dict[str, Any]):
        """Loads configuration data from dictionary.

        Args:
            data: A configuration dictionary to load configuration data from.
        """
        self.common_config = CommonConfiguration(data['common'])
        self.db_config = DataBaseConfiguration(data['bd'])
        self.llm_config = LLMConfiguration(data['llm'])

    def _load_from_config(self, config_file_path: str):
        """Reads configuration file and form configuration dictionary.

        Args:
            config_file_path: A configuration dictionary to load configuration data from.
        """
        data = parse_config(config_file_path)
        self._load_data(data)