AurelioAguirre commited on
Commit
f71fa9b
·
1 Parent(s): 0af4a83

changed to uvicorn setup for HF v5

Browse files
Files changed (1) hide show
  1. main/main.py +13 -0
main/main.py CHANGED
@@ -4,11 +4,16 @@ LLM Inference Server main application using LitServe framework.
4
  import litserve as ls
5
  import yaml
6
  import logging
 
7
  from pathlib import Path
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from .routes import router, init_router
10
  from .api import InferenceApi
11
 
 
 
 
 
12
  def setup_logging():
13
  """Set up basic logging configuration"""
14
  logging.basicConfig(
@@ -25,6 +30,8 @@ def load_config():
25
 
26
  def create_app():
27
  """Create and configure the application instance."""
 
 
28
  logger = setup_logging()
29
  config = load_config()
30
  server_config = config.get('server', {})
@@ -40,6 +47,9 @@ def create_app():
40
  track_requests=True
41
  )
42
 
 
 
 
43
  # Get the FastAPI app
44
  app = server.app
45
 
@@ -56,6 +66,9 @@ def create_app():
56
  api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
57
  app.include_router(router, prefix=api_prefix)
58
 
 
 
 
59
  return app
60
 
61
  # Create the app instance for uvicorn
 
4
  import litserve as ls
5
  import yaml
6
  import logging
7
+ import multiprocessing as mp
8
  from pathlib import Path
9
  from fastapi.middleware.cors import CORSMiddleware
10
  from .routes import router, init_router
11
  from .api import InferenceApi
12
 
13
+ # Store process list globally so it doesn't get garbage collected
14
+ _WORKER_PROCESSES = []
15
+ _MANAGER = None
16
+
17
  def setup_logging():
18
  """Set up basic logging configuration"""
19
  logging.basicConfig(
 
30
 
31
  def create_app():
32
  """Create and configure the application instance."""
33
+ global _WORKER_PROCESSES, _MANAGER
34
+
35
  logger = setup_logging()
36
  config = load_config()
37
  server_config = config.get('server', {})
 
47
  track_requests=True
48
  )
49
 
50
+ # Launch inference workers (assuming single uvicorn worker for now)
51
+ _MANAGER, _WORKER_PROCESSES = server.launch_inference_worker(num_uvicorn_servers=1)
52
+
53
  # Get the FastAPI app
54
  app = server.app
55
 
 
66
  api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
67
  app.include_router(router, prefix=api_prefix)
68
 
69
+ # Set the response queue ID for the app
70
+ app.response_queue_id = 0 # Since we're using a single worker
71
+
72
  return app
73
 
74
  # Create the app instance for uvicorn