File size: 1,884 Bytes
9b73fc2
 
47031d7
 
 
 
 
 
 
daae8cc
47031d7
15890c0
47031d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
daae8cc
 
47031d7
 
 
 
 
 
100e98e
db5664e
47031d7
7055ca8
47031d7
7055ca8
 
47031d7
 
 
 
 
daae8cc
 
 
100e98e
daae8cc
100e98e
 
daae8cc
 
47031d7
 
 
daae8cc
47031d7
 
 
 
 
daae8cc
100e98e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# main.py is the entry point for the application. It creates the FastAPI application instance through LitServe and configures it with the InferenceApi and router.

"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
from pathlib import Path
from fastapi.middleware.cors import CORSMiddleware
from .routes import router, init_router
from .api import InferenceApi

def setup_logging():
    """Set up basic logging configuration"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    return logging.getLogger(__name__)

def load_config():
    """Load configuration from config.yaml"""
    config_path = Path(__file__).parent / "config.yaml"
    with open(config_path) as f:
        return yaml.safe_load(f)

def create_app():
    """Create and configure the application instance."""
    logger = setup_logging()

    try:
        # Load configuration
        config = load_config()

        # Initialize API and router
        api = InferenceApi()
        init_router(config)

        # Create LitServer instance
        server = ls.LitServer(
            api,
            timeout=config.get("server", {}).get("timeout", 60),
            max_batch_size=1,
            track_requests=True
        )

        # Add CORS middleware
        server.app.add_middleware(
            CORSMiddleware,
            allow_origins=["*"],
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )

        # Add our routes to the server's FastAPI app
        server.app.include_router(router, prefix="/api/v1")

        return server.app

    except Exception as e:
        logger.error(f"Server initialization failed: {str(e)}")
        raise

# Create the application instance
app = create_app()