Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

AurelioAguirre commited on Jan 10

Commit

0af4a83

1 Parent(s): be8d239

changed to uvicorn setup for HF v4

Browse files

Files changed (1) hide show

main/main.py +32 -51

main/main.py CHANGED Viewed

@@ -4,7 +4,6 @@ LLM Inference Server main application using LitServe framework.
 import litserve as ls
 import yaml
 import logging
-import asyncio
 from pathlib import Path
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
@@ -24,58 +23,40 @@ def load_config():
     with open(config_path) as f:
         return yaml.safe_load(f)
-# Initialize everything synchronously
-logger = setup_logging()
-config = load_config()
-server_config = config.get('server', {})
-api = InferenceApi(config)
-# Create LitServer instance
-server = ls.LitServer(
-    api,
-    timeout=server_config.get('timeout', 60),
-    max_batch_size=server_config.get('max_batch_size', 1),
-    track_requests=True
-)
-# Get the FastAPI app from LitServer
-app = server.app
-# Add CORS middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Add routes with configured prefix
-api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
-app.include_router(router, prefix=api_prefix)
-@app.on_event("startup")
-async def startup_event():
-    """Initialize async components on startup."""
-    # Initialize the router
-    await init_router(api)
-    # Launch the inference worker
-    server.launch_inference_worker()
-@app.on_event("shutdown")
-async def shutdown_event():
-    """Cleanup on shutdown."""
-    server.stop_inference_worker()
-async def run_server():
-    """Run the server directly (not through uvicorn)"""
-    port = server_config.get('port', 8001)
-    host = server_config.get('host', '0.0.0.0')
-    server.run(host=host, port=port)
-def main():
-    """Entry point that runs the server directly"""
-    asyncio.run(run_server())
-if __name__ == "__main__":
-    main()

 import litserve as ls
 import yaml
 import logging
 from pathlib import Path
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
     with open(config_path) as f:
         return yaml.safe_load(f)
+def create_app():
+    """Create and configure the application instance."""
+    logger = setup_logging()
+    config = load_config()
+    server_config = config.get('server', {})
+    # Initialize API with config
+    api = InferenceApi(config)
+    # Create LitServer instance
+    server = ls.LitServer(
+        api,
+        timeout=server_config.get('timeout', 60),
+        max_batch_size=server_config.get('max_batch_size', 1),
+        track_requests=True
+    )
+    # Get the FastAPI app
+    app = server.app
+    # Add CORS middleware
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # Add routes with configured prefix
+    api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
+    app.include_router(router, prefix=api_prefix)
+    return app
+# Create the app instance for uvicorn
+app = create_app()