File size: 4,180 Bytes
640b1c8
 
 
aee2bfd
 
640b1c8
 
 
 
415595f
640b1c8
 
 
 
415595f
b953016
 
640b1c8
 
 
415595f
640b1c8
 
415595f
acdfaa9
 
c2e0ca0
 
 
640b1c8
c2e0ca0
 
 
 
3a56021
 
c2e0ca0
 
415595f
5bfdeda
 
 
 
 
 
 
 
 
 
82b8aa2
 
 
 
 
 
 
 
 
 
 
 
640b1c8
 
415595f
9700f95
 
415595f
aee2bfd
 
 
640b1c8
 
aee2bfd
 
415595f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e372c4
 
3a56021
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# config/config.py
import os
from dotenv import load_dotenv
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow

# Load environment variables
load_dotenv()


class Settings:
    # OpenAI Configuration
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
    OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo')

    ADMIN_API_KEY = 'aca4081f-6ff2-434c-843b-98f60285c499'

    # Ollama Configuration
    OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
    OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'llama2')

    # Anthropic Configuration
    ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')

    # top number of chunks to retrieve.
    TOP_CHUNKS = int(os.getenv('TOP_CHUNKS', '10'))
    # Environment Configuration
    ENVIRONMENT = os.getenv('ENVIRONMENT').lower()

    # Embedding Configuration
    @property
    def EMBEDDING_MODEL(self):
        if self.ENVIRONMENT == 'production':
            # Better model for demos
            # return os.getenv('EMBEDDING_MODEL', 'openai/text-embedding-3-large')
            return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
        # Better for development purposes.
        return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')

    # MongoDB Configuration
    # MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
    @property
    def MONGODB_URI(self):
        if self.ENVIRONMENT == 'production':
            # Better model for demos
            return os.getenv('MONGODB_URI', 'mongodb+srv://talat:[email protected]/?retryWrites=true&w=majority&appName=Chatbot')
        # Better for development purposes.
        return os.getenv('MONGODB_URI', 'mongodb://localhost:27017')

    # New Conversation Summarizer Settings
    SUMMARIZER_CONFIG = {
        # 'facebook/bart-large-cnn', for bigger and better model
        'model_name': os.getenv('SUMMARIZER_MODEL', 'facebook/bart-base'),
        'max_length': int(os.getenv('SUMMARIZER_MAX_LENGTH', '130')),
        'min_length': int(os.getenv('SUMMARIZER_MIN_LENGTH', '30')),
        'device': -1,  # CPU
        'model_kwargs': {
            'low_cpu_mem_usage': True
        }
    }

    # Vector Store Configuration
    CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')

    # Feedback Configuration
    MAX_RATING = int(os.getenv('MAX_RATING', '5'))

    # Temporary directory for downloaded files
    TEMP_DOWNLOAD_DIR = os.getenv('TEMP_DOWNLOAD_DIR', './temp_downloads')

    # Application Configuration
    DEBUG = os.getenv('DEBUG', 'False') == 'True'

    # Google Drive Configuration
    GOOGLE_DRIVE_FOLDER_ID = os.getenv('GOOGLE_DRIVE_FOLDER_ID', '')
    GOOGLE_SERVICE_ACCOUNT_PATH = os.getenv(
        'GOOGLE_SERVICE_ACCOUNT_PATH', 'service_account.json')

    # Use explicit type conversion to ensure correct types
    DOCUMENT_PROCESSOR = {
        'chunk_size': int(os.getenv('DOCUMENT_CHUNK_SIZE', '1000')),
        'chunk_overlap': int(os.getenv('DOCUMENT_CHUNK_OVERLAP', '200')),
        # 20MB in bytes
        'max_file_size': int(os.getenv('DOCUMENT_MAX_FILE_SIZE', str(20 * 1024 * 1024))),
        'supported_formats': [
            '.txt', '.pdf', '.docx', '.csv', '.json',
            '.html', '.md', '.xml', '.rtf', '.xlsx', '.xls'
        ]
    }

    @classmethod
    def get_document_processor_settings(cls) -> dict:
        """
        Get document processor settings with validation

        Returns:
            dict: Validated document processor settings
        """
        settings = cls.DOCUMENT_PROCESSOR.copy()

        # Ensure positive values for numeric settings
        settings['chunk_size'] = max(
            100, settings['chunk_size'])  # Minimum 100
        settings['chunk_overlap'] = min(
            settings['chunk_overlap'],
            # Ensure overlap is less than chunk size
            settings['chunk_size'] - 50
        )
        settings['max_file_size'] = max(
            1024 * 1024, settings['max_file_size'])  # Minimum 1MB

        return settings


settings = Settings()

print(f"Current Environment: {settings.ENVIRONMENT}")
# print(f"Current MongoDB URI: {settings.MONGODB_URI}")