Spaces:

keungliang
/

LiteLLM

Running

keungliang commited on Jun 24

Commit

fa497b5

verified ·

1 Parent(s): e3438df

Upload 2 files

Files changed (2) hide show

Dockerfile ADDED Viewed

+from ghcr.io/berriai/litellm:main-latest
+ENV TZ=Asia/Taipei
+ENV OPENAI_API_KEY=any
+COPY ./config.yaml /app/config.yaml
+EXPOSE 7860
+ENTRYPOINT ["litellm"]
+CMD [ "--config", "/app/config.yaml", "--port", "7860", "--num_workers", "8" ]

config.yaml ADDED Viewed

+model_list:
+  - model_name: azure/o3-pro
+    litellm_params:
+      model: azure-o3-pro
+      api_key: os.environ/AZURE_RESPONSES_OPENAI_API_KEY
+      api_base: os.environ/AZURE_RESPONSES_OPENAI_BASE_URL
+      api_version: "preview"
+    model_info:
+      mode: responses  # 指定使用 responses 模式
+# --------------Other Settings--------------------
+litellm_settings:
+  # Networking settings
+  request_timeout: 20 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
+  num_retries: 3
+  # fallbacks: [{ "gemini-1.5-pro": ["gemini-1.5-flash"] }]
+  allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
+  cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
+  drop_params: true
+general_settings:
+  master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
+# router_settings:
+#  fallbacks:
+#    [
+#      { "or/gemini-2.0-flash-exp": ["gg1/gemini-2.0-flash-exp"] },
+#      { "gpt-3.5-turbo": ["gemini-1.5-flash"] },
+#    ]
+#  model_group_alias: { "gpt-4": "gemini-1.5-pro" }
+#  routing_strategy: simple-shuffle