Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

Inference-API / main /main.py

AurelioAguirre

changed to uvicorn setup for HF v10

30d3c1f 3 months ago

raw

history blame

2.57 kB

	"""
	LLM Inference Server main application using LitServe framework.
	"""
	import litserve as ls
	import yaml
	import logging
	import os
	from pathlib import Path
	from fastapi.middleware.cors import CORSMiddleware
	from huggingface_hub import login
	from .routes import router, init_router
	from .api import InferenceApi

	# Store process list globally so it doesn't get garbage collected
	_WORKER_PROCESSES = []
	_MANAGER = None

	def setup_logging():
	"""Set up basic logging configuration"""
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	return logging.getLogger(__name__)

	def load_config():
	"""Load configuration from config.yaml"""
	config_path = Path(__file__).parent / "config.yaml"
	with open(config_path) as f:
	return yaml.safe_load(f)

	def create_app():
	"""Create and configure the application instance."""
	global _WORKER_PROCESSES, _MANAGER

	logger = setup_logging()

	# Log into Hugging Face Hub
	access_token = os.environ.get("InferenceAPI")
	if access_token:
	try:
	login(token=access_token)
	logger.info("Successfully logged into Hugging Face Hub")
	except Exception as e:
	logger.error(f"Failed to login to Hugging Face Hub: {str(e)}")
	else:
	logger.warning("No Hugging Face access token found")

	config = load_config()
	server_config = config.get('server', {})

	# Initialize API with config
	api = InferenceApi(config)

	# Initialize router with API instance
	init_router(api)

	# Create LitServer instance
	server = ls.LitServer(
	api,
	timeout=server_config.get('timeout', 60),
	max_batch_size=server_config.get('max_batch_size', 1),
	track_requests=True
	)

	# Launch inference workers (assuming single uvicorn worker for now)
	_MANAGER, _WORKER_PROCESSES = server.launch_inference_worker(num_uvicorn_servers=1)

	# Get the FastAPI app
	app = server.app

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Add routes with configured prefix
	api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
	app.include_router(router, prefix=api_prefix)

	# Set the response queue ID for the app
	app.response_queue_id = 0 # Since we're using a single worker

	return app

	# Create the app instance for uvicorn
	app = create_app()