Spaces:

TeamGenKI
/

Inference-API

Runtime error

File size: 2,833 Bytes

47031d7
 
 
 
 
 
9814b43
47031d7
c6b21e3
daae8cc
47031d7
15890c0
47031d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6b21e3
 
47031d7
 
 
 
 
02fd6bb
47031d7
b5d6152
02fd6bb
3431979
c6b21e3
b5d6152
7055ca8
02fd6bb
7055ca8
 
02fd6bb
 
47031d7
 
 
c6b21e3
 
 
daae8cc
c6b21e3
daae8cc
100e98e
daae8cc
100e98e
 
daae8cc
 
02fd6bb
 
c6b21e3
5b76cc5
c6b21e3
47031d7
 
c6b21e3
47031d7
 
c6b21e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9814b43
c6b21e3
 
9814b43
b688bec
5b76cc5

"""
LLM Inference Server main application using LitServe framework.
"""
import litserve as ls
import yaml
import logging
import asyncio
from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .routes import router, init_router
from .api import InferenceApi

def setup_logging():
    """Set up basic logging configuration"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    return logging.getLogger(__name__)

def load_config():
    """Load configuration from config.yaml"""
    config_path = Path(__file__).parent / "config.yaml"
    with open(config_path) as f:
        return yaml.safe_load(f)

async def init_app() -> tuple[FastAPI, InferenceApi, dict]:
    """Initialize and configure the FastAPI application."""
    logger = setup_logging()

    try:
        # Load configuration
        config = load_config()
        server_config = config.get('server', {})

        # Initialize API with config
        api = InferenceApi(config)

        # Initialize router with the API instance
        await init_router(api)

        # Create LitServer instance with config
        server = ls.LitServer(
            api,
            timeout=server_config.get('timeout', 60),
            max_batch_size=server_config.get('max_batch_size', 1),
            track_requests=True
        )

        # Get the FastAPI app from the LitServer
        app = server.app

        # Add CORS middleware
        app.add_middleware(
            CORSMiddleware,
            allow_origins=["*"],
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )

        # Add routes with configured prefix
        api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
        app.include_router(router, prefix=api_prefix)

        return app, api, config

    except Exception as e:
        logger.error(f"Application initialization failed: {str(e)}")
        raise

# Create the FastAPI app instance for uvicorn
app, api_instance, config_dict = asyncio.get_event_loop().run_until_complete(init_app())

async def run_server():
    """Run the server directly (not through uvicorn)"""
    server_config = config_dict.get('server', {})
    port = server_config.get('port', 8001)
    host = server_config.get('host', '0.0.0.0')

    # Create LitServer instance with all required parameters
    server = ls.LitServer(
        api_instance,
        timeout=server_config.get('timeout', 60),
        max_batch_size=server_config.get('max_batch_size', 1),
        track_requests=True
    )

    server.run(host=host, port=port)

def main():
    """Entry point that runs the server directly"""
    asyncio.run(run_server())

if __name__ == "__main__":
    main()