Spaces:
Configuration error
Configuration error
import traceback | |
from flask import Flask, request, Response | |
from flask_cors import CORS | |
import litellm | |
from util import handle_error | |
from litellm import completion | |
import os | |
import dotenv | |
import time | |
import json | |
dotenv.load_dotenv() | |
# TODO: set your keys in .env or here: | |
# os.environ["OPENAI_API_KEY"] = "" # set your openai key here | |
# os.environ["ANTHROPIC_API_KEY"] = "" # set your anthropic key here | |
# os.environ["TOGETHER_AI_API_KEY"] = "" # set your together ai key here | |
# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/ | |
######### ENVIRONMENT VARIABLES ########## | |
verbose = True | |
# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/ | |
######### PROMPT LOGGING ########## | |
os.environ["PROMPTLAYER_API_KEY"] = ( | |
"" # set your promptlayer key here - https://promptlayer.com/ | |
) | |
# set callbacks | |
litellm.success_callback = ["promptlayer"] | |
############ HELPER FUNCTIONS ################################### | |
def print_verbose(print_statement): | |
if verbose: | |
print(print_statement) | |
app = Flask(__name__) | |
CORS(app) | |
def index(): | |
return "received!", 200 | |
def data_generator(response): | |
for chunk in response: | |
yield f"data: {json.dumps(chunk)}\n\n" | |
def api_completion(): | |
data = request.json | |
start_time = time.time() | |
if data.get("stream") == "True": | |
data["stream"] = True # convert to boolean | |
try: | |
if "prompt" not in data: | |
raise ValueError("data needs to have prompt") | |
data["model"] = ( | |
"togethercomputer/CodeLlama-34b-Instruct" # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct | |
) | |
# COMPLETION CALL | |
system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that." | |
messages = [ | |
{"role": "system", "content": system_prompt}, | |
{"role": "user", "content": data.pop("prompt")}, | |
] | |
data["messages"] = messages | |
print(f"data: {data}") | |
response = completion(**data) | |
## LOG SUCCESS | |
end_time = time.time() | |
if ( | |
"stream" in data and data["stream"] == True | |
): # use generate_responses to stream responses | |
return Response(data_generator(response), mimetype="text/event-stream") | |
except Exception: | |
# call handle_error function | |
print_verbose(f"Got Error api_completion(): {traceback.format_exc()}") | |
## LOG FAILURE | |
end_time = time.time() | |
traceback_exception = traceback.format_exc() | |
return handle_error(data=data) | |
return response | |
def get_models(): | |
try: | |
return litellm.model_list | |
except Exception as e: | |
traceback.print_exc() | |
response = {"error": str(e)} | |
return response, 200 | |
if __name__ == "__main__": | |
from waitress import serve | |
serve(app, host="0.0.0.0", port=4000, threads=500) | |