File size: 2,729 Bytes
640b1c8
 
 
aee2bfd
 
640b1c8
 
 
 
415595f
640b1c8
 
 
 
415595f
b953016
 
640b1c8
 
 
415595f
640b1c8
 
415595f
640b1c8
 
415595f
640b1c8
 
415595f
e9d730a
 
415595f
9700f95
 
415595f
aee2bfd
 
 
640b1c8
 
aee2bfd
 
415595f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# config/config.py
import os
from dotenv import load_dotenv
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow

# Load environment variables
load_dotenv()


class Settings:
    # OpenAI Configuration
    OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
    OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo')

    ADMIN_API_KEY = 'aca4081f-6ff2-434c-843b-98f60285c499'

    # Ollama Configuration
    OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
    OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'llama2')

    # Anthropic Configuration
    ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')

    # Embedding Configuration
    EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')

    # Vector Store Configuration
    CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')

    # MongoDB Configuration
    MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017')

    # Feedback Configuration
    MAX_RATING = int(os.getenv('MAX_RATING', '5'))

    # Temporary directory for downloaded files
    TEMP_DOWNLOAD_DIR = os.getenv('TEMP_DOWNLOAD_DIR', './temp_downloads')

    # Application Configuration
    DEBUG = os.getenv('DEBUG', 'False') == 'True'

    # Google Drive Configuration
    GOOGLE_DRIVE_FOLDER_ID = os.getenv('GOOGLE_DRIVE_FOLDER_ID', '')
    GOOGLE_SERVICE_ACCOUNT_PATH = os.getenv(
        'GOOGLE_SERVICE_ACCOUNT_PATH', 'service_account.json')

    # Use explicit type conversion to ensure correct types
    DOCUMENT_PROCESSOR = {
        'chunk_size': int(os.getenv('DOCUMENT_CHUNK_SIZE', '1000')),
        'chunk_overlap': int(os.getenv('DOCUMENT_CHUNK_OVERLAP', '200')),
        # 20MB in bytes
        'max_file_size': int(os.getenv('DOCUMENT_MAX_FILE_SIZE', str(20 * 1024 * 1024))),
        'supported_formats': [
            '.txt', '.pdf', '.docx', '.csv', '.json',
            '.html', '.md', '.xml', '.rtf', '.xlsx', '.xls'
        ]
    }

    @classmethod
    def get_document_processor_settings(cls) -> dict:
        """
        Get document processor settings with validation

        Returns:
            dict: Validated document processor settings
        """
        settings = cls.DOCUMENT_PROCESSOR.copy()

        # Ensure positive values for numeric settings
        settings['chunk_size'] = max(
            100, settings['chunk_size'])  # Minimum 100
        settings['chunk_overlap'] = min(
            settings['chunk_overlap'],
            # Ensure overlap is less than chunk size
            settings['chunk_size'] - 50
        )
        settings['max_file_size'] = max(
            1024 * 1024, settings['max_file_size'])  # Minimum 1MB

        return settings


settings = Settings()