Spaces:

TeamGenKI
/

Inference-API

Runtime error

AurelioAguirre commited on Jan 10

Commit

f71fa9b

1 Parent(s): 0af4a83

changed to uvicorn setup for HF v5

Files changed (1) hide show

main/main.py CHANGED Viewed

@@ -4,11 +4,16 @@ LLM Inference Server main application using LitServe framework.
 import litserve as ls
 import yaml
 import logging
 from pathlib import Path
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
 from .api import InferenceApi
 def setup_logging():
     """Set up basic logging configuration"""
     logging.basicConfig(
@@ -25,6 +30,8 @@ def load_config():
 def create_app():
     """Create and configure the application instance."""
     logger = setup_logging()
     config = load_config()
     server_config = config.get('server', {})
@@ -40,6 +47,9 @@ def create_app():
         track_requests=True
     )
     # Get the FastAPI app
     app = server.app
@@ -56,6 +66,9 @@ def create_app():
     api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
     app.include_router(router, prefix=api_prefix)
     return app
 # Create the app instance for uvicorn

 import litserve as ls
 import yaml
 import logging
+import multiprocessing as mp
 from pathlib import Path
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
 from .api import InferenceApi
+# Store process list globally so it doesn't get garbage collected
+_WORKER_PROCESSES = []
+_MANAGER = None
 def setup_logging():
     """Set up basic logging configuration"""
     logging.basicConfig(
 def create_app():
     """Create and configure the application instance."""
+    global _WORKER_PROCESSES, _MANAGER
     logger = setup_logging()
     config = load_config()
     server_config = config.get('server', {})
         track_requests=True
     )
+    # Launch inference workers (assuming single uvicorn worker for now)
+    _MANAGER, _WORKER_PROCESSES = server.launch_inference_worker(num_uvicorn_servers=1)
     # Get the FastAPI app
     app = server.app
     api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
     app.include_router(router, prefix=api_prefix)
+    # Set the response queue ID for the app
+    app.response_queue_id = 0  # Since we're using a single worker
     return app
 # Create the app instance for uvicorn