server: host: "0.0.0.0" port: 8002 timeout: 60 max_batch_size: 1 llm_server: base_url: "http://0.0.0.0:8001" #"https://teamgenki-llmserver.hf.space:7860" timeout: 60.0 api_prefix: "/api/v1" # This will be used for route prefixing endpoints: generate: "/generate" generate_stream: "/generate/stream" embedding: "/embedding" system_status: "/system/status" system_validate: "/system/validate" model_initialize: "/model/initialize" model_initialize_embedding: "/model/initialize/embedding" model_download: "/model/download"