""" LLM Inference Server main application using LitServe framework. """ import litserve as ls import yaml import logging import asyncio from pathlib import Path from fastapi.middleware.cors import CORSMiddleware from .routes import router, init_router from .api import InferenceApi def setup_logging(): """Set up basic logging configuration""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) return logging.getLogger(__name__) def load_config(): """Load configuration from config.yaml""" config_path = Path(__file__).parent / "config.yaml" with open(config_path) as f: return yaml.safe_load(f) async def async_main(): """Create and configure the application instance asynchronously.""" logger = setup_logging() try: # Load configuration config = load_config() # Initialize API and router with await api = InferenceApi() await api.setup() # Properly await the setup await init_router(config) # Modified to be async # Create LitServer instance server = ls.LitServer( api, timeout=config.get("server", {}).get("timeout", 60), max_batch_size=1, track_requests=True ) # Add CORS middleware server.app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Add our routes to the server's FastAPI app server.app.include_router(router, prefix="/api/v1") port = config.get("server", {}).get("port", 8001) # Get the server instance from our app and run it server.run(port=port) except Exception as e: logger.error(f"Server initialization failed: {str(e)}") raise def main(): """Entry point that runs the async main""" asyncio.run(async_main()) if __name__ == "__main__": main()