Spaces:
Runtime error
Runtime error
File size: 2,128 Bytes
47031d7 daae8cc 47031d7 0e6d5fd 47031d7 daae8cc 47031d7 97968dc 7055ca8 47031d7 7055ca8 47031d7 daae8cc 47031d7 daae8cc 47031d7 daae8cc 47031d7 daae8cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
from pathlib import Path
from fastapi.middleware.cors import CORSMiddleware
from .routes import router, init_router
from .api import InferenceApi
def setup_logging():
"""Set up basic logging configuration"""
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
return logging.getLogger(__name__)
def load_config():
"""Load configuration from config.yaml"""
config_path = Path(__file__).parent / "config.yaml"
with open(config_path) as f:
return yaml.safe_load(f)
def create_app():
"""Create and configure the application instance."""
logger = setup_logging()
try:
# Load configuration
config = load_config()
# Initialize the router with our config
init_router(config)
api = InferenceApi(config)
# Create LitServer instance
server = ls.LitServer(
api,
timeout=config.get("server", {}).get("timeout", 60),
max_batch_size=1,
track_requests=True
)
# Add CORS middleware
server.app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allows all origins
allow_credentials=True,
allow_methods=["*"], # Allows all methods
allow_headers=["*"], # Allows all headers
)
# Add our routes to the server's FastAPI app
server.app.include_router(router, prefix="/api/v1")
return server.app
except Exception as e:
logger.error(f"Server initialization failed: {str(e)}")
raise
# Create the application instance
app = create_app()
if __name__ == "__main__":
# If we run this directly with python, we can still use the old method
config = load_config()
port = config.get("server", {}).get("port", 8001)
# Get the server instance from our app and run it
app.parent.run(port=port) # assuming the LitServer instance is stored as parent |