Spaces:
Running
Running
File size: 4,180 Bytes
640b1c8 aee2bfd 640b1c8 415595f 640b1c8 415595f b953016 640b1c8 415595f 640b1c8 415595f acdfaa9 c2e0ca0 640b1c8 c2e0ca0 3a56021 c2e0ca0 415595f 5bfdeda 82b8aa2 640b1c8 415595f 9700f95 415595f aee2bfd 640b1c8 aee2bfd 415595f 2e372c4 3a56021 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# config/config.py
import os
from dotenv import load_dotenv
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
# Load environment variables
load_dotenv()
class Settings:
# OpenAI Configuration
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo')
ADMIN_API_KEY = 'aca4081f-6ff2-434c-843b-98f60285c499'
# Ollama Configuration
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'llama2')
# Anthropic Configuration
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
# top number of chunks to retrieve.
TOP_CHUNKS = int(os.getenv('TOP_CHUNKS', '10'))
# Environment Configuration
ENVIRONMENT = os.getenv('ENVIRONMENT').lower()
# Embedding Configuration
@property
def EMBEDDING_MODEL(self):
if self.ENVIRONMENT == 'production':
# Better model for demos
# return os.getenv('EMBEDDING_MODEL', 'openai/text-embedding-3-large')
return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
# Better for development purposes.
return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
# MongoDB Configuration
# MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
@property
def MONGODB_URI(self):
if self.ENVIRONMENT == 'production':
# Better model for demos
return os.getenv('MONGODB_URI', 'mongodb+srv://talat:[email protected]/?retryWrites=true&w=majority&appName=Chatbot')
# Better for development purposes.
return os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
# New Conversation Summarizer Settings
SUMMARIZER_CONFIG = {
# 'facebook/bart-large-cnn', for bigger and better model
'model_name': os.getenv('SUMMARIZER_MODEL', 'facebook/bart-base'),
'max_length': int(os.getenv('SUMMARIZER_MAX_LENGTH', '130')),
'min_length': int(os.getenv('SUMMARIZER_MIN_LENGTH', '30')),
'device': -1, # CPU
'model_kwargs': {
'low_cpu_mem_usage': True
}
}
# Vector Store Configuration
CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')
# Feedback Configuration
MAX_RATING = int(os.getenv('MAX_RATING', '5'))
# Temporary directory for downloaded files
TEMP_DOWNLOAD_DIR = os.getenv('TEMP_DOWNLOAD_DIR', './temp_downloads')
# Application Configuration
DEBUG = os.getenv('DEBUG', 'False') == 'True'
# Google Drive Configuration
GOOGLE_DRIVE_FOLDER_ID = os.getenv('GOOGLE_DRIVE_FOLDER_ID', '')
GOOGLE_SERVICE_ACCOUNT_PATH = os.getenv(
'GOOGLE_SERVICE_ACCOUNT_PATH', 'service_account.json')
# Use explicit type conversion to ensure correct types
DOCUMENT_PROCESSOR = {
'chunk_size': int(os.getenv('DOCUMENT_CHUNK_SIZE', '1000')),
'chunk_overlap': int(os.getenv('DOCUMENT_CHUNK_OVERLAP', '200')),
# 20MB in bytes
'max_file_size': int(os.getenv('DOCUMENT_MAX_FILE_SIZE', str(20 * 1024 * 1024))),
'supported_formats': [
'.txt', '.pdf', '.docx', '.csv', '.json',
'.html', '.md', '.xml', '.rtf', '.xlsx', '.xls'
]
}
@classmethod
def get_document_processor_settings(cls) -> dict:
"""
Get document processor settings with validation
Returns:
dict: Validated document processor settings
"""
settings = cls.DOCUMENT_PROCESSOR.copy()
# Ensure positive values for numeric settings
settings['chunk_size'] = max(
100, settings['chunk_size']) # Minimum 100
settings['chunk_overlap'] = min(
settings['chunk_overlap'],
# Ensure overlap is less than chunk size
settings['chunk_size'] - 50
)
settings['max_file_size'] = max(
1024 * 1024, settings['max_file_size']) # Minimum 1MB
return settings
settings = Settings()
print(f"Current Environment: {settings.ENVIRONMENT}")
# print(f"Current MongoDB URI: {settings.MONGODB_URI}")
|