# main.py is the entry point for the application. It creates the FastAPI application instance through LitServe and configures it with the InferenceApi and router. """ LLM Inference Server main application using LitServe framework. """ import litserve as ls import yaml import logging from pathlib import Path from fastapi.middleware.cors import CORSMiddleware from .routes import router, init_router from .api import InferenceApi def setup_logging(): """Set up basic logging configuration""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) return logging.getLogger(__name__) def load_config(): """Load configuration from config.yaml""" config_path = Path(__file__).parent / "config.yaml" with open(config_path) as f: return yaml.safe_load(f) def create_app(): """Create and configure the application instance.""" logger = setup_logging() try: # Load configuration config = load_config() # Initialize API and router api = InferenceApi() init_router(config) # Create LitServer instance server = ls.LitServer( api, timeout=config.get("server", {}).get("timeout", 60), max_batch_size=1, track_requests=True ) # Add CORS middleware server.app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Add our routes to the server's FastAPI app server.app.include_router(router, prefix="/api/v1") return server.app except Exception as e: logger.error(f"Server initialization failed: {str(e)}") raise # Create the application instance app = create_app()