Spaces:
Configuration error
Configuration error
File size: 5,331 Bytes
447ebeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import sys
import os
from dotenv import load_dotenv
load_dotenv()
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from litellm import Router
import litellm
litellm.set_verbose = False
# os.environ.pop("AZURE_AD_TOKEN")
model_list = [
{ # list of model deployments
"model_name": "gpt-3.5-turbo", # model alias
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-v-2", # actual model name
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "azure/chatgpt-functioncalling",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
},
{
"model_name": "gpt-3.5-turbo",
"litellm_params": { # params for litellm completion/embedding call
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
},
]
router = Router(model_list=model_list)
file_paths = [
"test_questions/question1.txt",
"test_questions/question2.txt",
"test_questions/question3.txt",
]
questions = []
for file_path in file_paths:
try:
print(file_path)
with open(file_path, "r") as file:
content = file.read()
questions.append(content)
except FileNotFoundError as e:
print(f"File not found: {e}")
except Exception as e:
print(f"An error occurred: {e}")
# for q in questions:
# print(q)
# make X concurrent calls to litellm.completion(model=gpt-35-turbo, messages=[]), pick a random question in questions array.
# Allow me to tune X concurrent calls.. Log question, output/exception, response time somewhere
# show me a summary of requests made, success full calls, failed calls. For failed calls show me the exceptions
import concurrent.futures
import random
import time
# Function to make concurrent calls to OpenAI API
def make_openai_completion(question):
try:
start_time = time.time()
import requests
data = {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": f"You are a helpful assistant. Answer this question{question}",
},
],
}
response = requests.post("http://0.0.0.0:8000/queue/request", json=data)
response = response.json()
end_time = time.time()
# Log the request details
with open("request_log.txt", "a") as log_file:
log_file.write(
f"Question: {question[:100]}\nResponse ID: {response.get('id', 'N/A')} Url: {response.get('url', 'N/A')}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
# polling the url
while True:
try:
url = response["url"]
polling_url = f"http://0.0.0.0:8000{url}"
polling_response = requests.get(polling_url)
polling_response = polling_response.json()
print("\n RESPONSE FROM POLLING JoB", polling_response)
status = polling_response["status"]
if status == "finished":
llm_response = polling_response["result"]
with open("response_log.txt", "a") as log_file:
log_file.write(
f"Response ID: {llm_response.get('id', 'NA')}\nLLM Response: {llm_response}\nTime: {end_time - start_time:.2f} seconds\n\n"
)
break
print(
f"POLLING JOB{polling_url}\nSTATUS: {status}, \n Response {polling_response}"
)
time.sleep(0.5)
except Exception as e:
print("got exception in polling", e)
break
return response
except Exception as e:
# Log exceptions for failed calls
with open("error_log.txt", "a") as error_log_file:
error_log_file.write(f"Question: {question[:100]}\nException: {str(e)}\n\n")
return None
# Number of concurrent calls (you can adjust this)
concurrent_calls = 10
# List to store the futures of concurrent calls
futures = []
# Make concurrent calls
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor:
for _ in range(concurrent_calls):
random_question = random.choice(questions)
futures.append(executor.submit(make_openai_completion, random_question))
# Wait for all futures to complete
concurrent.futures.wait(futures)
# Summarize the results
successful_calls = 0
failed_calls = 0
for future in futures:
if future.done():
if future.result() is not None:
successful_calls += 1
else:
failed_calls += 1
print("Load test Summary:")
print(f"Total Requests: {concurrent_calls}")
print(f"Successful Calls: {successful_calls}")
print(f"Failed Calls: {failed_calls}")
|