import logging import os from pathlib import Path import pandas as pd from common.common import configure_logging from common.configuration import Configuration from components.elastic import create_index_elastic_chunks, create_index_elastic_people from components.embedding_extraction import EmbeddingExtractor from controlpanel.components.datasets.dispatcher import Dispatcher from components.nmd.services.acronym import AcronymService from components.nmd.services.dataset import DatasetService from components.nmd.services.document import DocumentService from components.sqlite.create_database import create_database CONFIG_PATH = os.environ.get('CONFIG_PATH', './config_dev.yaml') config = Configuration(CONFIG_PATH) logger = logging.getLogger(__name__) configure_logging(config_file_path=config.common_config.log_file_path) logger.info(f'Start work...') logger.info(f'Use config: {os.path.abspath(CONFIG_PATH)}') model = EmbeddingExtractor( config.db_config.faiss.model_embedding_path, config.db_config.faiss.device, ) dispatcher = Dispatcher(model, config, logger) acronym_service = AcronymService() dataset_service = DatasetService(model, dispatcher, config) document_service = DocumentService(dataset_service, config) create_database(dataset_service, config) current_dataset = dataset_service.get_current_dataset() dispatcher.reset_dataset(current_dataset.dataset_id) df = pd.read_pickle( Path.cwd() / config.db_config.files.regulations_path / f'{current_dataset.dataset_id}' / 'dataset.pkl' ) if config.db_config.elastic.use_elastic: create_index_elastic_chunks(df, logger) create_index_elastic_people(config.db_config.elastic.people_path, logger) logger.info('Loaded embedding model')