""" LLM Inference Server main application using LitServe framework. """ import litserve as ls import yaml import logging from pathlib import Path from fastapi.middleware.cors import CORSMiddleware from .routes import router, init_router from .api import InferenceApi def setup_logging(): """Set up basic logging configuration""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) return logging.getLogger(__name__) def load_config(): """Load configuration from config.yaml""" config_path = Path(__file__).parent / "config.yaml" with open(config_path) as f: return yaml.safe_load(f) def create_app(): """Create and configure the application instance.""" logger = setup_logging() try: # Load configuration config = load_config() # Initialize API and router api = InferenceApi() init_router(config) # Create LitServer instance server = ls.LitServer( api, timeout=config.get("server", {}).get("timeout", 60), max_batch_size=1, track_requests=True ) # Add CORS middleware server.app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Add our routes to the server's FastAPI app server.app.include_router(router, prefix="/api/v1") return server.app except Exception as e: logger.error(f"Server initialization failed: {str(e)}") raise # Create the application instance app = create_app() if __name__ == "__main__": # If we run this directly with python, we can still use the old method config = load_config() port = config.get("server", {}).get("port", 8001) # Get the server instance from our app and run it app.parent.run(port=port) # assuming the LitServer instance is stored as parent