server: host: "0.0.0.0" port: 8002 timeout: 60 max_batch_size: 1 llm_server: host: "0.0.0.0" port: 8002 # Will be ignored for hf.space URLs timeout: 60.0 api_prefix: "/v1" # This will be used for route prefixing endpoints: generate: "/generate" generate_stream: "/generate/stream" embedding: "/embedding" system_status: "/system/status" system_validate: "/system/validate" model_initialize: "/model/initialize" model_initialize_embedding: "/model/initialize/embedding" model_download: "/model/download" model: defaults: model_name: "microsoft/Phi-3.5-mini-instruct"