File size: 3,190 Bytes
447ebeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import traceback
from flask import Flask, request, Response
from flask_cors import CORS
import litellm
from util import handle_error
from litellm import completion
import os
import dotenv
import time
import json

dotenv.load_dotenv()

# TODO: set your keys in .env or here:
# os.environ["OPENAI_API_KEY"] = "" # set your openai key here
# os.environ["ANTHROPIC_API_KEY"] = "" # set your anthropic key here
# os.environ["TOGETHER_AI_API_KEY"] = "" # set your together ai key here
# see supported models / keys here: https://litellm.readthedocs.io/en/latest/supported/
######### ENVIRONMENT VARIABLES ##########
verbose = True

# litellm.caching_with_models = True # CACHING: caching_with_models Keys in the cache are messages + model. - to learn more: https://docs.litellm.ai/docs/caching/
######### PROMPT LOGGING ##########
os.environ["PROMPTLAYER_API_KEY"] = (
    ""  # set your promptlayer key here - https://promptlayer.com/
)

# set callbacks
litellm.success_callback = ["promptlayer"]
############ HELPER FUNCTIONS ###################################


def print_verbose(print_statement):
    if verbose:
        print(print_statement)


app = Flask(__name__)
CORS(app)


@app.route("/")
def index():
    return "received!", 200


def data_generator(response):
    for chunk in response:
        yield f"data: {json.dumps(chunk)}\n\n"


@app.route("/chat/completions", methods=["POST"])
def api_completion():
    data = request.json
    start_time = time.time()
    if data.get("stream") == "True":
        data["stream"] = True  # convert to boolean
    try:
        if "prompt" not in data:
            raise ValueError("data needs to have prompt")
        data["model"] = (
            "togethercomputer/CodeLlama-34b-Instruct"  # by default use Together AI's CodeLlama model - https://api.together.xyz/playground/chat?model=togethercomputer%2FCodeLlama-34b-Instruct
        )
        # COMPLETION CALL
        system_prompt = "Only respond to questions about code. Say 'I don't know' to anything outside of that."
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": data.pop("prompt")},
        ]
        data["messages"] = messages
        print(f"data: {data}")
        response = completion(**data)
        ## LOG SUCCESS
        end_time = time.time()
        if (
            "stream" in data and data["stream"] == True
        ):  # use generate_responses to stream responses
            return Response(data_generator(response), mimetype="text/event-stream")
    except Exception:
        # call handle_error function
        print_verbose(f"Got Error api_completion(): {traceback.format_exc()}")
        ## LOG FAILURE
        end_time = time.time()
        traceback_exception = traceback.format_exc()
        return handle_error(data=data)
    return response


@app.route("/get_models", methods=["POST"])
def get_models():
    try:
        return litellm.model_list
    except Exception as e:
        traceback.print_exc()
        response = {"error": str(e)}
    return response, 200


if __name__ == "__main__":
    from waitress import serve

    serve(app, host="0.0.0.0", port=4000, threads=500)