|
""" |
|
Central configuration for the entire Document Intelligence app. |
|
All modules import from here rather than hard-coding values. |
|
""" |
|
import os |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OPENAI_EMBEDDING_MODEL = os.getenv( |
|
"OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002" |
|
) |
|
class EmbeddingConfig: |
|
PROVIDER = os.getenv("EMBEDDING_PROVIDER",'HF') |
|
TEXT_MODEL = os.getenv('TEXT_EMBED_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') |
|
META_MODEL = os.getenv('META_EMBED_MODEL', 'sentence-transformers/all-MiniLM-L6-v2') |
|
|
|
|
|
|
|
class RetrieverConfig: |
|
PROVIDER = os.getenv("EMBEDDING_PROVIDER",'HF') |
|
TOP_K = int(os.getenv('RETRIEVER_TOP_K', 10)) |
|
DENSE_MODEL = 'sentence-transformers/all-MiniLM-L6-v2' |
|
|
|
ANN_TOP = int(os.getenv('ANN_TOP', 50)) |
|
|
|
class RerankerConfig: |
|
MODEL_NAME = os.getenv('RERANKER_MODEL', 'BAAI/bge-reranker-v2-Gemma') |
|
DEVICE = os.getenv('RERANKER_DEVICE', 'cuda' if os.getenv('CUDA_VISIBLE_DEVICES') else 'cpu') |
|
|
|
class GPPConfig: |
|
CHUNK_TOKEN_SIZE = int(os.getenv('CHUNK_TOKEN_SIZE', 256)) |
|
DEDUP_SIM_THRESHOLD = float(os.getenv('DEDUP_SIM_THRESHOLD', 0.9)) |
|
EXPANSION_SIM_THRESHOLD = float(os.getenv('EXPANSION_SIM_THRESHOLD', 0.85)) |
|
COREF_CONTEXT_SIZE = int(os.getenv('COREF_CONTEXT_SIZE', 3)) |