File size: 1,618 Bytes
3301b3c
 
 
 
 
 
04db7e0
 
 
 
 
3301b3c
04db7e0
 
 
3301b3c
04db7e0
3301b3c
 
04db7e0
 
3301b3c
 
04db7e0
33f4e34
 
04db7e0
33f4e34
3301b3c
 
 
 
 
 
 
 
 
04db7e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
Central configuration for the entire Document Intelligence app.
All modules import from here rather than hard-coding values.
"""
import os

# class RedisConfig:
#     HOST = os.getenv('REDIS_HOST', 'localhost')
#     PORT = int(os.getenv('REDIS_PORT', 6379))
#     DB = int(os.getenv('REDIS_DB', 0))
#     VECTOR_INDEX = os.getenv('REDIS_VECTOR_INDEX', 'gpp_vectors')

OPENAI_EMBEDDING_MODEL = os.getenv(
        "OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002"
    )
class EmbeddingConfig:
    PROVIDER = os.getenv("EMBEDDING_PROVIDER",'HF')
    TEXT_MODEL = os.getenv('TEXT_EMBED_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
    META_MODEL = os.getenv('META_EMBED_MODEL', 'sentence-transformers/all-MiniLM-L6-v2')
    # TEXT_MODEL = OPENAI_EMBEDDING_MODEL
    # META_MODEL = OPENAI_EMBEDDING_MODEL

class RetrieverConfig:
    PROVIDER = os.getenv("EMBEDDING_PROVIDER",'HF')
    TOP_K = int(os.getenv('RETRIEVER_TOP_K', 10))  # number of candidates per retrieval path
    DENSE_MODEL = 'sentence-transformers/all-MiniLM-L6-v2'
    # DENSE_MODEL = OPENAI_EMBEDDING_MODEL
    ANN_TOP = int(os.getenv('ANN_TOP', 50))

class RerankerConfig:
    MODEL_NAME = os.getenv('RERANKER_MODEL', 'BAAI/bge-reranker-v2-Gemma')
    DEVICE = os.getenv('RERANKER_DEVICE', 'cuda' if os.getenv('CUDA_VISIBLE_DEVICES') else 'cpu')

class GPPConfig:
    CHUNK_TOKEN_SIZE = int(os.getenv('CHUNK_TOKEN_SIZE', 256))
    DEDUP_SIM_THRESHOLD = float(os.getenv('DEDUP_SIM_THRESHOLD', 0.9))
    EXPANSION_SIM_THRESHOLD = float(os.getenv('EXPANSION_SIM_THRESHOLD', 0.85))
    COREF_CONTEXT_SIZE = int(os.getenv('COREF_CONTEXT_SIZE', 3))