Inference-API / main /main.py
AurelioAguirre's picture
added openAI schema based endpoint and response
02fd6bb
raw
history blame
2.21 kB
"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
import asyncio
from pathlib import Path
from fastapi.middleware.cors import CORSMiddleware
from .routes import router, init_router
from .api import InferenceApi
def setup_logging():
"""Set up basic logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
return logging.getLogger(__name__)
def load_config():
"""Load configuration from config.yaml"""
config_path = Path(__file__).parent / "config.yaml"
with open(config_path) as f:
return yaml.safe_load(f)
async def async_main():
"""Create and configure the application instance asynchronously."""
logger = setup_logging()
try:
# Load configuration
config = load_config()
server_config = config.get('server', {})
# Initialize API with config and await setup
api = InferenceApi(config)
await api.setup()
await init_router(config)
# Create LitServer instance with config
server = ls.LitServer(
api,
timeout=server_config.get('timeout', 60),
max_batch_size=server_config.get('max_batch_size', 1),
track_requests=True
)
# Add CORS middleware
server.app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add routes with configured prefix
api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
server.app.include_router(router, prefix=api_prefix)
# Get configured port
port = server_config.get('port', 8001)
host = server_config.get('host', 'localhost')
# Run server
server.run(host=host, port=port)
except Exception as e:
logger.error(f"Server initialization failed: {str(e)}")
raise
def main():
"""Entry point that runs the async main"""
asyncio.run(async_main())
if __name__ == "__main__":
main()