Spaces:
Configuration error
Configuration error
from abc import ABC, abstractmethod | |
from litellm.caching import LiteLLMCacheType | |
import os | |
import sys | |
import time | |
import traceback | |
import uuid | |
from dotenv import load_dotenv | |
load_dotenv() | |
import os | |
sys.path.insert( | |
0, os.path.abspath("../..") | |
) # Adds the parent directory to the system path | |
import asyncio | |
import hashlib | |
import random | |
import pytest | |
import litellm | |
from litellm.caching import Cache | |
from litellm import completion, embedding | |
class LLMCachingUnitTests(ABC): | |
def get_cache_type(self) -> LiteLLMCacheType: | |
pass | |
async def test_cache_completion(self, sync_mode): | |
litellm._turn_on_debug() | |
random_number = random.randint( | |
1, 100000 | |
) # add a random number to ensure it's always adding / reading from cache | |
messages = [ | |
{ | |
"role": "user", | |
"content": f"write a one sentence poem about: {random_number}", | |
} | |
] | |
cache_type = self.get_cache_type() | |
litellm.cache = Cache( | |
type=cache_type, | |
) | |
if sync_mode: | |
response1 = completion( | |
"gpt-3.5-turbo", | |
messages=messages, | |
caching=True, | |
max_tokens=20, | |
mock_response="This number is so great!", | |
) | |
else: | |
response1 = await litellm.acompletion( | |
"gpt-3.5-turbo", | |
messages=messages, | |
caching=True, | |
max_tokens=20, | |
mock_response="This number is so great!", | |
) | |
# response2 is mocked to a different response from response1, | |
# but the completion from the cache should be used instead of the mock | |
# response since the input is the same as response1 | |
await asyncio.sleep(0.5) | |
if sync_mode: | |
response2 = completion( | |
"gpt-3.5-turbo", | |
messages=messages, | |
caching=True, | |
max_tokens=20, | |
mock_response="This number is great!", | |
) | |
else: | |
response2 = await litellm.acompletion( | |
"gpt-3.5-turbo", | |
messages=messages, | |
caching=True, | |
max_tokens=20, | |
mock_response="This number is great!", | |
) | |
if ( | |
response1["choices"][0]["message"]["content"] | |
!= response2["choices"][0]["message"]["content"] | |
): # 1 and 2 should be the same | |
# 1&2 have the exact same input params. This MUST Be a CACHE HIT | |
print(f"response1: {response1}") | |
print(f"response2: {response2}") | |
pytest.fail( | |
f"Error occurred: response1 - {response1['choices'][0]['message']['content']} != response2 - {response2['choices'][0]['message']['content']}" | |
) | |
# Since the parameters are not the same as response1, response3 should actually | |
# be the mock response | |
if sync_mode: | |
response3 = completion( | |
"gpt-3.5-turbo", | |
messages=messages, | |
caching=True, | |
temperature=0.5, | |
mock_response="This number is awful!", | |
) | |
else: | |
response3 = await litellm.acompletion( | |
"gpt-3.5-turbo", | |
messages=messages, | |
caching=True, | |
temperature=0.5, | |
mock_response="This number is awful!", | |
) | |
print("\nresponse 1", response1) | |
print("\nresponse 2", response2) | |
print("\nresponse 3", response3) | |
# print("\nresponse 4", response4) | |
litellm.cache = None | |
litellm.success_callback = [] | |
litellm._async_success_callback = [] | |
# 1 & 2 should be exactly the same | |
# 1 & 3 should be different, since input params are diff | |
if ( | |
response1["choices"][0]["message"]["content"] | |
== response3["choices"][0]["message"]["content"] | |
): | |
# if input params like max_tokens, temperature are diff it should NOT be a cache hit | |
print(f"response1: {response1}") | |
print(f"response3: {response3}") | |
pytest.fail( | |
f"Response 1 == response 3. Same model, diff params shoudl not cache Error" | |
f" occurred:" | |
) | |
assert response1.id == response2.id | |
assert response1.created == response2.created | |
assert ( | |
response1.choices[0].message.content == response2.choices[0].message.content | |
) | |
async def test_disk_cache_embedding(self, sync_mode): | |
litellm._turn_on_debug() | |
random_number = random.randint( | |
1, 100000 | |
) # add a random number to ensure it's always adding / reading from cache | |
input = [f"hello {random_number}"] | |
litellm.cache = Cache( | |
type="disk", | |
) | |
if sync_mode: | |
response1 = embedding( | |
"openai/text-embedding-ada-002", | |
input=input, | |
caching=True, | |
) | |
else: | |
response1 = await litellm.aembedding( | |
"openai/text-embedding-ada-002", | |
input=input, | |
caching=True, | |
) | |
# response2 is mocked to a different response from response1, | |
# but the completion from the cache should be used instead of the mock | |
# response since the input is the same as response1 | |
await asyncio.sleep(0.5) | |
if sync_mode: | |
response2 = embedding( | |
"openai/text-embedding-ada-002", | |
input=input, | |
caching=True, | |
) | |
else: | |
response2 = await litellm.aembedding( | |
"openai/text-embedding-ada-002", | |
input=input, | |
caching=True, | |
) | |
if response2._hidden_params["cache_hit"] is not True: | |
pytest.fail("Cache hit should be True") | |
# Since the parameters are not the same as response1, response3 should actually | |
# be the mock response | |
if sync_mode: | |
response3 = embedding( | |
"openai/text-embedding-ada-002", | |
input=input, | |
user="charlie", | |
caching=True, | |
) | |
else: | |
response3 = await litellm.aembedding( | |
"openai/text-embedding-ada-002", | |
input=input, | |
caching=True, | |
user="charlie", | |
) | |
print("\nresponse 1", response1) | |
print("\nresponse 2", response2) | |
print("\nresponse 3", response3) | |
# print("\nresponse 4", response4) | |
litellm.cache = None | |
litellm.success_callback = [] | |
litellm._async_success_callback = [] | |
# 1 & 2 should be exactly the same | |
# 1 & 3 should be different, since input params are diff | |
if response3._hidden_params.get("cache_hit") is True: | |
pytest.fail("Cache hit should not be True") | |