Spaces:
Runtime error
Runtime error
File size: 2,833 Bytes
47031d7 9814b43 47031d7 c6b21e3 daae8cc 47031d7 15890c0 47031d7 c6b21e3 47031d7 02fd6bb 47031d7 b5d6152 02fd6bb 3431979 c6b21e3 b5d6152 7055ca8 02fd6bb 7055ca8 02fd6bb 47031d7 c6b21e3 daae8cc c6b21e3 daae8cc 100e98e daae8cc 100e98e daae8cc 02fd6bb c6b21e3 5b76cc5 c6b21e3 47031d7 c6b21e3 47031d7 c6b21e3 9814b43 c6b21e3 9814b43 b688bec 5b76cc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
import asyncio
from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .routes import router, init_router
from .api import InferenceApi
def setup_logging():
"""Set up basic logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
return logging.getLogger(__name__)
def load_config():
"""Load configuration from config.yaml"""
config_path = Path(__file__).parent / "config.yaml"
with open(config_path) as f:
return yaml.safe_load(f)
async def init_app() -> tuple[FastAPI, InferenceApi, dict]:
"""Initialize and configure the FastAPI application."""
logger = setup_logging()
try:
# Load configuration
config = load_config()
server_config = config.get('server', {})
# Initialize API with config
api = InferenceApi(config)
# Initialize router with the API instance
await init_router(api)
# Create LitServer instance with config
server = ls.LitServer(
api,
timeout=server_config.get('timeout', 60),
max_batch_size=server_config.get('max_batch_size', 1),
track_requests=True
)
# Get the FastAPI app from the LitServer
app = server.app
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Add routes with configured prefix
api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
app.include_router(router, prefix=api_prefix)
return app, api, config
except Exception as e:
logger.error(f"Application initialization failed: {str(e)}")
raise
# Create the FastAPI app instance for uvicorn
app, api_instance, config_dict = asyncio.get_event_loop().run_until_complete(init_app())
async def run_server():
"""Run the server directly (not through uvicorn)"""
server_config = config_dict.get('server', {})
port = server_config.get('port', 8001)
host = server_config.get('host', '0.0.0.0')
# Create LitServer instance with all required parameters
server = ls.LitServer(
api_instance,
timeout=server_config.get('timeout', 60),
max_batch_size=server_config.get('max_batch_size', 1),
track_requests=True
)
server.run(host=host, port=port)
def main():
"""Entry point that runs the server directly"""
asyncio.run(run_server())
if __name__ == "__main__":
main() |