Inference-API / main /main.py
AurelioAguirre's picture
changed to uvicorn setup for HF v8
92cdcfc
raw
history blame
2.15 kB
"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
import multiprocessing as mp
from pathlib import Path
from fastapi.middleware.cors import CORSMiddleware
from .routes import router, init_router
from .api import InferenceApi
# Store process list globally so it doesn't get garbage collected
_WORKER_PROCESSES = []
_MANAGER = None
def setup_logging():
"""Set up basic logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
return logging.getLogger(__name__)
def load_config():
"""Load configuration from config.yaml"""
config_path = Path(__file__).parent / "config.yaml"
with open(config_path) as f:
return yaml.safe_load(f)
def create_app():
"""Create and configure the application instance."""
global _WORKER_PROCESSES, _MANAGER
logger = setup_logging()
config = load_config()
server_config = config.get('server', {})
# Initialize API with config
api = InferenceApi(config)
# Initialize router with API instance
init_router(api)
# Create LitServer instance
server = ls.LitServer(
api,
timeout=server_config.get('timeout', 60),
max_batch_size=server_config.get('max_batch_size', 1),
track_requests=True
)
# Launch inference workers (assuming single uvicorn worker for now)
_MANAGER, _WORKER_PROCESSES = server.launch_inference_worker(num_uvicorn_servers=1)
# Get the FastAPI app
app = server.app
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add routes with configured prefix
api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
app.include_router(router, prefix=api_prefix)
# Set the response queue ID for the app
app.response_queue_id = 0 # Since we're using a single worker
return app
# Create the app instance for uvicorn
app = create_app()