Spaces:
Configuration error
Configuration error
# What is this? | |
## This tests if the proxy fallbacks work as expected | |
import pytest | |
import asyncio | |
import aiohttp | |
from tests.large_text import text | |
import time | |
from typing import Optional | |
async def generate_key( | |
session, | |
i, | |
models: list, | |
calling_key="sk-1234", | |
): | |
url = "http://0.0.0.0:4000/key/generate" | |
headers = { | |
"Authorization": f"Bearer {calling_key}", | |
"Content-Type": "application/json", | |
} | |
data = { | |
"models": models, | |
} | |
print(f"data: {data}") | |
async with session.post(url, headers=headers, json=data) as response: | |
status = response.status | |
response_text = await response.text() | |
print(f"Response {i} (Status code: {status}):") | |
print(response_text) | |
print() | |
if status != 200: | |
raise Exception(f"Request {i} did not return a 200 status code: {status}") | |
return await response.json() | |
async def chat_completion( | |
session, | |
key: str, | |
model: str, | |
messages: list, | |
return_headers: bool = False, | |
extra_headers: Optional[dict] = None, | |
**kwargs, | |
): | |
url = "http://0.0.0.0:4000/chat/completions" | |
headers = { | |
"Authorization": f"Bearer {key}", | |
"Content-Type": "application/json", | |
} | |
if extra_headers is not None: | |
headers.update(extra_headers) | |
data = {"model": model, "messages": messages, **kwargs} | |
async with session.post(url, headers=headers, json=data) as response: | |
status = response.status | |
response_text = await response.text() | |
print(response_text) | |
print() | |
if status != 200: | |
if return_headers: | |
return None, response.headers | |
else: | |
raise Exception(f"Request did not return a 200 status code: {status}") | |
if return_headers: | |
return await response.json(), response.headers | |
else: | |
return await response.json() | |
async def test_chat_completion(): | |
""" | |
make chat completion call with prompt > context window. expect it to work with fallback | |
""" | |
async with aiohttp.ClientSession() as session: | |
model = "gpt-3.5-turbo" | |
messages = [ | |
{"role": "system", "content": text}, | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
await chat_completion( | |
session=session, key="sk-1234", model=model, messages=messages | |
) | |
async def test_chat_completion_client_fallbacks(has_access): | |
""" | |
make chat completion call with prompt > context window. expect it to work with fallback | |
""" | |
async with aiohttp.ClientSession() as session: | |
models = ["gpt-3.5-turbo"] | |
if has_access: | |
models.append("gpt-instruct") | |
## CREATE KEY WITH MODELS | |
generated_key = await generate_key(session=session, i=0, models=models) | |
calling_key = generated_key["key"] | |
model = "gpt-3.5-turbo" | |
messages = [ | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
## CALL PROXY | |
try: | |
await chat_completion( | |
session=session, | |
key=calling_key, | |
model=model, | |
messages=messages, | |
mock_testing_fallbacks=True, | |
fallbacks=["gpt-instruct"], | |
) | |
if not has_access: | |
pytest.fail( | |
"Expected this to fail, submitted fallback model that key did not have access to" | |
) | |
except Exception as e: | |
if has_access: | |
pytest.fail("Expected this to work: {}".format(str(e))) | |
async def test_chat_completion_with_retries(): | |
""" | |
make chat completion call with prompt > context window. expect it to work with fallback | |
""" | |
async with aiohttp.ClientSession() as session: | |
model = "fake-openai-endpoint-4" | |
messages = [ | |
{"role": "system", "content": text}, | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
response, headers = await chat_completion( | |
session=session, | |
key="sk-1234", | |
model=model, | |
messages=messages, | |
mock_testing_rate_limit_error=True, | |
return_headers=True, | |
) | |
print(f"headers: {headers}") | |
assert headers["x-litellm-attempted-retries"] == "1" | |
assert headers["x-litellm-max-retries"] == "50" | |
async def test_chat_completion_with_fallbacks(): | |
""" | |
make chat completion call with prompt > context window. expect it to work with fallback | |
""" | |
async with aiohttp.ClientSession() as session: | |
model = "badly-configured-openai-endpoint" | |
messages = [ | |
{"role": "system", "content": text}, | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
response, headers = await chat_completion( | |
session=session, | |
key="sk-1234", | |
model=model, | |
messages=messages, | |
fallbacks=["fake-openai-endpoint-5"], | |
return_headers=True, | |
) | |
print(f"headers: {headers}") | |
assert headers["x-litellm-attempted-fallbacks"] == "1" | |
async def test_chat_completion_with_timeout(): | |
""" | |
make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers. | |
""" | |
async with aiohttp.ClientSession() as session: | |
model = "fake-openai-endpoint-5" | |
messages = [ | |
{"role": "system", "content": text}, | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
start_time = time.time() | |
response, headers = await chat_completion( | |
session=session, | |
key="sk-1234", | |
model=model, | |
messages=messages, | |
num_retries=0, | |
mock_timeout=True, | |
return_headers=True, | |
) | |
end_time = time.time() | |
print(f"headers: {headers}") | |
assert ( | |
headers["x-litellm-timeout"] == "1.0" | |
) # assert model-specific timeout used | |
async def test_chat_completion_with_timeout_from_request(): | |
""" | |
make chat completion call with low timeout and `mock_timeout`: true. Expect it to fail and correct timeout to be set in headers. | |
""" | |
async with aiohttp.ClientSession() as session: | |
model = "fake-openai-endpoint-5" | |
messages = [ | |
{"role": "system", "content": text}, | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
extra_headers = { | |
"x-litellm-timeout": "0.001", | |
} | |
start_time = time.time() | |
response, headers = await chat_completion( | |
session=session, | |
key="sk-1234", | |
model=model, | |
messages=messages, | |
num_retries=0, | |
mock_timeout=True, | |
extra_headers=extra_headers, | |
return_headers=True, | |
) | |
end_time = time.time() | |
print(f"headers: {headers}") | |
assert ( | |
headers["x-litellm-timeout"] == "0.001" | |
) # assert model-specific timeout used | |
async def test_chat_completion_client_fallbacks_with_custom_message(has_access): | |
""" | |
make chat completion call with prompt > context window. expect it to work with fallback | |
""" | |
async with aiohttp.ClientSession() as session: | |
models = ["gpt-3.5-turbo"] | |
if has_access: | |
models.append("gpt-instruct") | |
## CREATE KEY WITH MODELS | |
generated_key = await generate_key(session=session, i=0, models=models) | |
calling_key = generated_key["key"] | |
model = "gpt-3.5-turbo" | |
messages = [ | |
{"role": "user", "content": "Who was Alexander?"}, | |
] | |
## CALL PROXY | |
try: | |
await chat_completion( | |
session=session, | |
key=calling_key, | |
model=model, | |
messages=messages, | |
mock_testing_fallbacks=True, | |
fallbacks=[ | |
{ | |
"model": "gpt-instruct", | |
"messages": [ | |
{ | |
"role": "assistant", | |
"content": "This is a custom message", | |
} | |
], | |
} | |
], | |
) | |
if not has_access: | |
pytest.fail( | |
"Expected this to fail, submitted fallback model that key did not have access to" | |
) | |
except Exception as e: | |
if has_access: | |
pytest.fail("Expected this to work: {}".format(str(e))) | |
import asyncio | |
from openai import AsyncOpenAI | |
from typing import List | |
import time | |
async def make_request(client: AsyncOpenAI, model: str) -> bool: | |
try: | |
await client.chat.completions.create( | |
model=model, | |
messages=[{"role": "user", "content": "Who was Alexander?"}], | |
) | |
return True | |
except Exception as e: | |
print(f"Error with {model}: {str(e)}") | |
return False | |
async def run_good_model_test(client: AsyncOpenAI, num_requests: int) -> bool: | |
tasks = [make_request(client, "good-model") for _ in range(num_requests)] | |
good_results = await asyncio.gather(*tasks) | |
return all(good_results) | |
async def test_chat_completion_bad_and_good_model(): | |
""" | |
Prod test - ensure even if bad model is down, good model is still working. | |
""" | |
client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000") | |
num_requests = 100 | |
num_iterations = 3 | |
for iteration in range(num_iterations): | |
print(f"\nIteration {iteration + 1}/{num_iterations}") | |
start_time = time.time() | |
# Fire and forget bad model requests | |
for _ in range(num_requests): | |
asyncio.create_task(make_request(client, "bad-model")) | |
# Wait only for good model requests | |
success = await run_good_model_test(client, num_requests) | |
print( | |
f"Iteration {iteration + 1}: {'✓' if success else '✗'} ({time.time() - start_time:.2f}s)" | |
) | |
assert success, "Not all good model requests succeeded" | |