Spaces:
Runtime error
Runtime error
File size: 1,591 Bytes
47031d7 a4e24d4 47031d7 a4e24d4 7055ca8 47031d7 7055ca8 47031d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
from pathlib import Path
from .routes import router, init_router
from api import InferenceApi
def setup_logging():
"""Set up basic logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
return logging.getLogger(__name__)
def load_config():
"""Load configuration from config.yaml"""
config_path = Path(__file__).parent / "config.yaml"
with open(config_path) as f:
return yaml.safe_load(f)
def main():
"""Main function to set up and run the inference server."""
logger = setup_logging()
try:
# Load configuration
config = load_config()
# Initialize the router with our config
init_router(config)
api = InferenceApi()
# Create LitServer instance
server = ls.LitServer(
api,
timeout=config.get("server", {}).get("timeout", 60),
max_batch_size=1,
track_requests=True
)
# Add our routes to the server's FastAPI app
server.app.include_router(router, prefix="/api/v1")
# Get port from config or use default
port = config.get("server", {}).get("port", 8001)
logger.info(f"Starting server on port {port}")
server.run(port=port)
except Exception as e:
logger.error(f"Server initialization failed: {str(e)}")
raise
if __name__ == "__main__":
main() |