Spaces:
Running
Running
File size: 4,267 Bytes
640b1c8 aee2bfd 640b1c8 415595f 640b1c8 415595f b953016 640b1c8 415595f 640b1c8 415595f acdfaa9 c2e0ca0 b08d8ce 415595f 5bfdeda 82b8aa2 640b1c8 415595f 9700f95 415595f aee2bfd 640b1c8 aee2bfd 415595f 2e372c4 3a56021 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# config/config.py
import os
from dotenv import load_dotenv
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
# Load environment variables
load_dotenv()
class Settings:
# OpenAI Configuration
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo')
ADMIN_API_KEY = 'aca4081f-6ff2-434c-843b-98f60285c499'
# Ollama Configuration
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'llama2')
# Anthropic Configuration
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
# top number of chunks to retrieve.
TOP_CHUNKS = int(os.getenv('TOP_CHUNKS', '10'))
# Environment Configuration
ENVIRONMENT = os.getenv('ENVIRONMENT').lower()
# Embedding Configuration - Updated for BAAI model
EMBEDDING_MODEL = 'BAAI/bge-large-en-v1.5'
# @property
# def EMBEDDING_MODEL(self):
# if self.ENVIRONMENT == 'production':
# # Better model for demos
# return os.getenv('EMBEDDING_MODEL', 'openai/text-embedding-3-large')
# #return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
# # Better for development purposes.
# return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
# MongoDB Configuration
# MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
@property
def MONGODB_URI(self):
if self.ENVIRONMENT == 'production':
# Better model for demos
return os.getenv('MONGODB_URI', 'mongodb+srv://talat:[email protected]/?retryWrites=true&w=majority&appName=Chatbot')
# Better for development purposes.
return os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
# New Conversation Summarizer Settings
SUMMARIZER_CONFIG = {
# 'facebook/bart-large-cnn', for bigger and better model
'model_name': os.getenv('SUMMARIZER_MODEL', 'facebook/bart-base'),
'max_length': int(os.getenv('SUMMARIZER_MAX_LENGTH', '130')),
'min_length': int(os.getenv('SUMMARIZER_MIN_LENGTH', '30')),
'device': -1, # CPU
'model_kwargs': {
'low_cpu_mem_usage': True
}
}
# Vector Store Configuration
CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')
# Feedback Configuration
MAX_RATING = int(os.getenv('MAX_RATING', '5'))
# Temporary directory for downloaded files
TEMP_DOWNLOAD_DIR = os.getenv('TEMP_DOWNLOAD_DIR', './temp_downloads')
# Application Configuration
DEBUG = os.getenv('DEBUG', 'False') == 'True'
# Google Drive Configuration
GOOGLE_DRIVE_FOLDER_ID = os.getenv('GOOGLE_DRIVE_FOLDER_ID', '')
GOOGLE_SERVICE_ACCOUNT_PATH = os.getenv(
'GOOGLE_SERVICE_ACCOUNT_PATH', 'service_account.json')
# Use explicit type conversion to ensure correct types
DOCUMENT_PROCESSOR = {
'chunk_size': int(os.getenv('DOCUMENT_CHUNK_SIZE', '1000')),
'chunk_overlap': int(os.getenv('DOCUMENT_CHUNK_OVERLAP', '200')),
# 20MB in bytes
'max_file_size': int(os.getenv('DOCUMENT_MAX_FILE_SIZE', str(20 * 1024 * 1024))),
'supported_formats': [
'.txt', '.pdf', '.docx', '.csv', '.json',
'.html', '.md', '.xml', '.rtf', '.xlsx', '.xls'
]
}
@classmethod
def get_document_processor_settings(cls) -> dict:
"""
Get document processor settings with validation
Returns:
dict: Validated document processor settings
"""
settings = cls.DOCUMENT_PROCESSOR.copy()
# Ensure positive values for numeric settings
settings['chunk_size'] = max(
100, settings['chunk_size']) # Minimum 100
settings['chunk_overlap'] = min(
settings['chunk_overlap'],
# Ensure overlap is less than chunk size
settings['chunk_size'] - 50
)
settings['max_file_size'] = max(
1024 * 1024, settings['max_file_size']) # Minimum 1MB
return settings
settings = Settings()
print(f"Current Environment: {settings.ENVIRONMENT}")
# print(f"Current MongoDB URI: {settings.MONGODB_URI}")
|