|
import os |
|
|
|
import faiss |
|
import pandas as pd |
|
from openai import AsyncOpenAI |
|
import pathlib |
|
from functools import lru_cache |
|
from environs import Env |
|
from transformers import AutoModel, AutoTokenizer |
|
import torch |
|
|
|
env = Env() |
|
env.read_env() |
|
|
|
|
|
class BaseConfig: |
|
BASE_DIR: pathlib.Path = pathlib.Path(__file__).parent.parent |
|
DATA_DIR: pathlib.Path = BASE_DIR / 'project' / 'data' |
|
MODEL_NAME = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2' |
|
INFO_MODEL = AutoModel.from_pretrained(MODEL_NAME) |
|
INFO_TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME) |
|
OPENAI_CLIENT = AsyncOpenAI(api_key=os.getenv('OPENAI_API_KEY')) |
|
FAISS_INDEX = faiss.read_index(str(BASE_DIR / 'faiss_javea.index')) |
|
|
|
|
|
class DevelopmentConfig(BaseConfig): |
|
pass |
|
|
|
|
|
class ProductionConfig(BaseConfig): |
|
DATABASE_URL = f"postgresql+asyncpg://{env('DATABASE_USER')}:" \ |
|
f"{env('DATABASE_PASSWORD')}@" \ |
|
f"{env('DATABASE_HOST')}:" \ |
|
f"{env('DATABASE_PORT')}/" \ |
|
f"{env('DATABASE_NAME')}" |
|
PROMPT = "Je bent een expert in de regio Javea in Italië, die alles weet om mensen te helpen die migreren van " \ |
|
"Nederland naar Spanje. Je taak is om mensen te helpen zich te vestigen in de nieuwe stad. Gebruik " \ |
|
"kennis uit je vorige antwoord (voornamelijk uit opmerkingen) om een informatief antwoord " \ |
|
"te geven op de gebruikersvraag. Vermeld nooit dat je kennis haalt uit posts of opmerkingen. Spreek " \ |
|
"vanuit jezelf." |
|
EMPTY_PROMPT = "Je bent een expert in Javea aan de Costa Blanca in Spanje, met uitgebreide kennis om Nederlanders " \ |
|
"te helpen die naar deze regio verhuizen. Je taak is om mensen te helpen zich thuis te voelen in " \ |
|
"hun nieuwe stad. Gebruik je kennis over deze regio maar informatieve antwoorden te " \ |
|
"geven op de vragen van gebruikers." |
|
SUMMARIZE_PROMPT = "Study the user's requests, paying special attention to the specific mentioned wishes when " \ |
|
"choosing a house. Combine these details into a single query that reflects all the user's " \ |
|
"needs. Formulate your answer as if you were a user, clearly and concisely stating the " \ |
|
"requirements. Make sure that all relevant user wishes are indicated in your response. " |
|
|
|
def __init__(self): |
|
if torch.cuda.is_available(): |
|
device = torch.device("cuda") |
|
|
|
else: |
|
device = torch.device("cpu") |
|
self.device = device |
|
self.INFO_MODEL.to(device) |
|
self.products_dataset = pd.read_csv(self.BASE_DIR / 'chunks_javea.csv') |
|
|
|
|
|
class TestConfig(BaseConfig): |
|
pass |
|
|
|
|
|
@lru_cache() |
|
def get_settings() -> DevelopmentConfig | ProductionConfig | TestConfig: |
|
config_cls_dict = { |
|
'development': DevelopmentConfig, |
|
'production': ProductionConfig, |
|
'testing': TestConfig |
|
} |
|
config_name = env('FASTAPI_CONFIG', default='development') |
|
config_cls = config_cls_dict[config_name] |
|
return config_cls() |
|
|
|
|
|
settings = get_settings() |
|
|