Spaces:
Running
Running
File size: 4,460 Bytes
640b1c8 aee2bfd 640b1c8 415595f 640b1c8 415595f b953016 640b1c8 415595f 640b1c8 415595f acdfaa9 c2e0ca0 b08d8ce 415595f 5bfdeda 0314a77 12a98d5 0314a77 5bfdeda 82b8aa2 640b1c8 415595f 9700f95 415595f aee2bfd 640b1c8 aee2bfd 415595f 2e372c4 3a56021 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# config/config.py
import os
from dotenv import load_dotenv
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
# Load environment variables
load_dotenv()
class Settings:
# OpenAI Configuration
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY', '')
OPENAI_MODEL = os.getenv('OPENAI_MODEL', 'gpt-3.5-turbo')
ADMIN_API_KEY = 'aca4081f-6ff2-434c-843b-98f60285c499'
# Ollama Configuration
OLLAMA_BASE_URL = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
OLLAMA_MODEL = os.getenv('OLLAMA_MODEL', 'llama2')
# Anthropic Configuration
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY', '')
# top number of chunks to retrieve.
TOP_CHUNKS = int(os.getenv('TOP_CHUNKS', '10'))
# Environment Configuration
ENVIRONMENT = os.getenv('ENVIRONMENT').lower()
# Embedding Configuration - Updated for BAAI model
EMBEDDING_MODEL = 'BAAI/bge-large-en-v1.5'
# @property
# def EMBEDDING_MODEL(self):
# if self.ENVIRONMENT == 'production':
# # Better model for demos
# return os.getenv('EMBEDDING_MODEL', 'openai/text-embedding-3-large')
# #return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
# # Better for development purposes.
# return os.getenv('EMBEDDING_MODEL', 'all-MiniLM-L6-v2')
# MongoDB Configuration
# MONGODB_URI = os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
MONGODB_URI = os.getenv('MONGODB_URI', '')
# MONGODB_URI = 'mongodb+srv://talat:[email protected]/?retryWrites=true&w=majority&appName=Chatbot'
# @property
# def MONGODB_URI(self):
# if self.ENVIRONMENT == 'production':
# # Better model for demos
# return os.getenv('MONGODB_URI', 'mongodb+srv://talat:[email protected]/?retryWrites=true&w=majority&appName=Chatbot')
# # Better for development purposes.
# return os.getenv('MONGODB_URI', 'mongodb://localhost:27017')
# New Conversation Summarizer Settings
SUMMARIZER_CONFIG = {
# 'facebook/bart-large-cnn', for bigger and better model
'model_name': os.getenv('SUMMARIZER_MODEL', 'facebook/bart-base'),
'max_length': int(os.getenv('SUMMARIZER_MAX_LENGTH', '130')),
'min_length': int(os.getenv('SUMMARIZER_MIN_LENGTH', '30')),
'device': -1, # CPU
'model_kwargs': {
'low_cpu_mem_usage': True
}
}
# Vector Store Configuration
CHROMA_PATH = os.getenv('CHROMA_PATH', './chroma_db')
# Feedback Configuration
MAX_RATING = int(os.getenv('MAX_RATING', '5'))
# Temporary directory for downloaded files
TEMP_DOWNLOAD_DIR = os.getenv('TEMP_DOWNLOAD_DIR', './temp_downloads')
# Application Configuration
DEBUG = os.getenv('DEBUG', 'False') == 'True'
# Google Drive Configuration
GOOGLE_DRIVE_FOLDER_ID = os.getenv('GOOGLE_DRIVE_FOLDER_ID', '')
GOOGLE_SERVICE_ACCOUNT_PATH = os.getenv(
'GOOGLE_SERVICE_ACCOUNT_PATH', 'service_account.json')
# Use explicit type conversion to ensure correct types
DOCUMENT_PROCESSOR = {
'chunk_size': int(os.getenv('DOCUMENT_CHUNK_SIZE', '1000')),
'chunk_overlap': int(os.getenv('DOCUMENT_CHUNK_OVERLAP', '200')),
# 20MB in bytes
'max_file_size': int(os.getenv('DOCUMENT_MAX_FILE_SIZE', str(20 * 1024 * 1024))),
'supported_formats': [
'.txt', '.pdf', '.docx', '.csv', '.json',
'.html', '.md', '.xml', '.rtf', '.xlsx', '.xls'
]
}
@classmethod
def get_document_processor_settings(cls) -> dict:
"""
Get document processor settings with validation
Returns:
dict: Validated document processor settings
"""
settings = cls.DOCUMENT_PROCESSOR.copy()
# Ensure positive values for numeric settings
settings['chunk_size'] = max(
100, settings['chunk_size']) # Minimum 100
settings['chunk_overlap'] = min(
settings['chunk_overlap'],
# Ensure overlap is less than chunk size
settings['chunk_size'] - 50
)
settings['max_file_size'] = max(
1024 * 1024, settings['max_file_size']) # Minimum 1MB
return settings
settings = Settings()
print(f"Current Environment: {settings.ENVIRONMENT}")
# print(f"Current MongoDB URI: {settings.MONGODB_URI}")
|