Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

Inference-API / main /main.py

AurelioAguirre

added openAI schema based endpoint and response

02fd6bb 6 months ago

raw

history blame

2.21 kB

	"""
	LLM Inference Server main application using LitServe framework.
	"""
	import litserve as ls
	import yaml
	import logging
	import asyncio
	from pathlib import Path
	from fastapi.middleware.cors import CORSMiddleware
	from .routes import router, init_router
	from .api import InferenceApi

	def setup_logging():
	"""Set up basic logging configuration"""
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	return logging.getLogger(__name__)

	def load_config():
	"""Load configuration from config.yaml"""
	config_path = Path(__file__).parent / "config.yaml"
	with open(config_path) as f:
	return yaml.safe_load(f)

	async def async_main():
	"""Create and configure the application instance asynchronously."""
	logger = setup_logging()

	try:
	# Load configuration
	config = load_config()
	server_config = config.get('server', {})

	# Initialize API with config and await setup
	api = InferenceApi(config)
	await api.setup()
	await init_router(config)

	# Create LitServer instance with config
	server = ls.LitServer(
	api,
	timeout=server_config.get('timeout', 60),
	max_batch_size=server_config.get('max_batch_size', 1),
	track_requests=True
	)

	# Add CORS middleware
	server.app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Add routes with configured prefix
	api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
	server.app.include_router(router, prefix=api_prefix)

	# Get configured port
	port = server_config.get('port', 8001)
	host = server_config.get('host', 'localhost')

	# Run server
	server.run(host=host, port=port)

	except Exception as e:
	logger.error(f"Server initialization failed: {str(e)}")
	raise

	def main():
	"""Entry point that runs the async main"""
	asyncio.run(async_main())

	if __name__ == "__main__":
	main()