muryshev's picture
update
4a37130
import logging
from enum import Enum
def configure_logging(level=logging.INFO, config_file_path='./common.log'):
formatter = logging.Formatter("[%(asctime)s.%(msecs)03d] %(module)30s:%(lineno)4d %(levelname)-7s - %(message)s")
console_handler = logging.StreamHandler()
console_handler.setLevel(level)
console_handler.setFormatter(formatter)
logging.basicConfig(
filename=config_file_path,
filemode="a",
level=level,
datefmt="%Y-%m-%d %H:%M:%S",
format="[%(asctime)s.%(msecs)03d] %(module)30s:%(lineno)4d %(levelname)-7s - %(message)s",
handlers=[console_handler]
)
def get_elastic_query(query):
return {
"query": {
"multi_match": {
"query": f"{query}",
"fields": ["text"],
"fuzziness": "AUTO",
"analyzer": "russian",
}
}
}
def get_elastic_people_query(query):
has_business_curator = (
"бизнес куратор" in query.lower()
or "бизнес-куратор" in query.lower()
or "куратор" in query.lower()
)
business_curator_boost = 30 if has_business_curator else 15
return {
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": f"{query}",
"fields": ["person_name^3"],
"fuzziness": "AUTO",
"analyzer": "standard",
}
},
{
"nested": {
"path": "business_processes",
"query": {
"multi_match": {
"query": f"{query}",
"fields": [
"business_processes.production_activities_section",
"business_processes.processes_name",
],
"fuzziness": "AUTO",
"analyzer": "standard",
}
},
}
},
{
"nested": {
"path": "organizatinal_structure",
"query": {
"multi_match": {
"query": f"{query}",
"fields": ["organizatinal_structure.position^2"],
"fuzziness": "AUTO",
"analyzer": "standard",
}
},
}
},
{
"nested": {
"path": "business_curator",
"query": {
"multi_match": {
"query": f"{query}",
"fields": [
f"business_curator.company_name^{business_curator_boost}"
],
"fuzziness": "AUTO",
"analyzer": "standard",
}
},
}
},
]
}
},
"min_score": 13.0,
}
def get_elastic_group_query(query):
return {
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": f"{query}",
"fields": ["group_name"],
"fuzziness": "AUTO",
"analyzer": "standard",
}
},
{
"multi_match": {
"query": "персонального состава Персональный состав Комитета ПАО ГМК Норильский никель Рабочей группы",
"fields": ["group_name"],
"operator": "or",
"boost": 0.1,
}
},
]
}
},
"min_score": 7.5,
}
def get_elastic_rocks_nn_query(query):
return {
"query": {
"function_score": {
"query": {
"multi_match": {
"query": f"{query}",
"fields": ["division_name", "division_name_2", "company_name"],
"fuzziness": "AUTO",
"analyzer": "custom_analyzer",
}
},
"functions": [{"filter": {"term": {"_id": "3"}}, "weight": 0.5}],
"boost_mode": "multiply",
}
},
"min_score": 0.5,
}
def get_elastic_segmentation_query(query):
return {
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": f"{query}",
"fields": [
"segmentation_model",
"segmentation_model2",
"company_name",
],
"fuzziness": "AUTO",
"analyzer": "russian",
}
},
{
"multi_match": {
"query": "модели сегментации модель сегментации",
"fields": ["segmentation_model", "segmentation_model2"],
"operator": "or",
"boost": 0.1,
}
},
]
}
},
"min_score": 1.0,
}
def get_elastic_abbreviation_query(query):
return {
"query": {
"multi_match": {
"query": f"{query}",
"fuzziness": "AUTO",
"fields": ["text"],
"analyzer": "russian",
}
}
}
def combine_answer(answer):
"""
Args:
answer:
Returns:
"""
answer_combined = {}
indexes = []
for key in answer:
if key != 'people_search':
for answer_key in answer[key]:
answer_value = answer[key][answer_key]
filename_i = answer_value["doc_name"]
title_i = answer_value["title"]
if (
filename_i in answer_combined
and answer_value['index_answer'] not in indexes
):
answer_combined[filename_i]["chunks"].append(answer_value)
else:
answer_combined[filename_i] = {
"filename": filename_i,
"title": title_i,
"chunks": [answer_value],
}
indexes.append(answer_value['index_answer'])
return list(answer_combined.values())
class TypeQuestion(Enum):
TYPE_ONE = '[1]'
TYPE_TWO = '[2]'
TYPE_THREE = '[3]'
def get_source_format(filename: str) -> str:
"""
Получает формат файла из имени файла.
"""
format_ = filename.split('.')[-1]
return format_.upper()