""" LLM Inference Server main application using LitServe framework. """ import litserve as ls import yaml import logging import asyncio from pathlib import Path from fastapi.middleware.cors import CORSMiddleware from .routes import router, init_router from .api import InferenceApi def setup_logging(): """Set up basic logging configuration""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) return logging.getLogger(__name__) def load_config(): """Load configuration from config.yaml""" config_path = Path(__file__).parent / "config.yaml" with open(config_path) as f: return yaml.safe_load(f) async def async_main(): """Create and configure the application instance asynchronously.""" logger = setup_logging() try: # Load configuration config = load_config() server_config = config.get('server', {}) # Initialize API with config and await setup api = InferenceApi(config) await api.setup() await init_router(config) # Create LitServer instance with config server = ls.LitServer( api, timeout=server_config.get('timeout', 60), max_batch_size=server_config.get('max_batch_size', 1), track_requests=True ) # Add CORS middleware server.app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Add routes with configured prefix api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1') server.app.include_router(router, prefix=api_prefix) # Get configured port port = server_config.get('port', 8001) host = server_config.get('host', 'localhost') # Run server server.run(host=host, port=port) except Exception as e: logger.error(f"Server initialization failed: {str(e)}") raise def main(): """Entry point that runs the async main""" asyncio.run(async_main()) if __name__ == "__main__": main()