DevsDoCode commited on
Commit
0c11451
·
verified ·
1 Parent(s): c7e1476

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -14
  2. app.py +160 -0
Dockerfile CHANGED
@@ -1,20 +1,11 @@
1
- # Use an official Python runtime as the base image
2
- FROM python:3.9-slim
3
 
4
- # Set the working directory in the container
5
- WORKDIR /app
6
 
7
- # Copy the requirements file into the container
8
- COPY requirements.txt .
9
 
10
- # Install the required packages
11
- RUN pip install --no-cache-dir -r requirements.txt
12
 
13
- # Copy the rest of the application code into the container
14
  COPY . .
15
 
16
- # Expose the port that FastAPI will run on
17
- EXPOSE 7860
18
-
19
- # Command to run the FastAPI application
20
- CMD ["uvicorn", "SERVER:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.10
 
2
 
3
+ WORKDIR /code
 
4
 
5
+ COPY ./requirements.txt /code/requirements.txt
 
6
 
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
8
 
 
9
  COPY . .
10
 
11
+ CMD ["flask", "run", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
app.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, Response
2
+ from functools import wraps
3
+ import uuid
4
+ import json
5
+ from typing import List, Optional
6
+ from pydantic import BaseModel, ValidationError
7
+ from API_provider import API_Inference
8
+ from core_logic import (
9
+ check_api_key_validity,
10
+ update_request_count,
11
+ get_rate_limit_status,
12
+ get_subscription_status,
13
+ get_available_models,
14
+ get_model_info,
15
+ )
16
+
17
+ app = Flask(__name__)
18
+
19
+ class Message(BaseModel):
20
+ role: str
21
+ content: str
22
+
23
+ class ChatCompletionRequest(BaseModel):
24
+ model: str
25
+ messages: List[Message]
26
+ stream: Optional[bool] = False
27
+ max_tokens: Optional[int] = 4000
28
+ temperature: Optional[float] = 0.5
29
+ top_p: Optional[float] = 0.95
30
+
31
+ def get_api_key():
32
+ auth_header = request.headers.get('Authorization')
33
+ if not auth_header or not auth_header.startswith('Bearer '):
34
+ return None
35
+ return auth_header.split(' ')[1]
36
+
37
+ def requires_api_key(func):
38
+ @wraps(func)
39
+ def decorated(*args, **kwargs):
40
+ api_key = get_api_key()
41
+ if not api_key:
42
+ return jsonify({'detail': 'Not authenticated'}), 401
43
+ kwargs['api_key'] = api_key
44
+ return func(*args, **kwargs)
45
+ return decorated
46
+
47
+ @app.route('/v1/chat/completions', methods=['POST'])
48
+ @requires_api_key
49
+ def chat_completions(api_key):
50
+ try:
51
+ # Parse and validate request data
52
+ try:
53
+ data = request.get_json()
54
+ chat_request = ChatCompletionRequest(**data)
55
+ except ValidationError as e:
56
+ return jsonify({'detail': e.errors()}), 400
57
+
58
+ # Check API key validity and rate limit
59
+ is_valid, error_message = check_api_key_validity(api_key)
60
+ if not is_valid:
61
+ return jsonify({'detail': error_message}), 401
62
+
63
+ messages = [{"role": msg.role, "content": msg.content} for msg in chat_request.messages]
64
+
65
+ # Get model info
66
+ model_info = get_model_info(chat_request.model)
67
+ if not model_info:
68
+ return jsonify({'detail': 'Invalid model specified'}), 400
69
+
70
+ # Model mapping
71
+ model_mapping = {
72
+ "meta-llama-405b-turbo": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
73
+ "claude-3.5-sonnet": "claude-3-sonnet-20240229",
74
+ }
75
+ model_name = model_mapping.get(chat_request.model, chat_request.model)
76
+ credits_reduction = {
77
+ "gpt-4o": 1,
78
+ "claude-3-sonnet-20240229": 1,
79
+ "gemini-1.5-pro": 1,
80
+ "gemini-1-5-flash": 1,
81
+ "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 1,
82
+ "o1-mini": 2,
83
+ "o1-preview": 3,
84
+ }.get(model_name, 0)
85
+
86
+ if chat_request.stream:
87
+ def generate():
88
+ try:
89
+ for chunk in API_Inference(messages, model=model_name, stream=True,
90
+ max_tokens=chat_request.max_tokens,
91
+ temperature=chat_request.temperature,
92
+ top_p=chat_request.top_p):
93
+ data = json.dumps({'choices': [{'delta': {'content': chunk}}]})
94
+ yield f"data: {data}\n\n"
95
+ yield f"data: [DONE]\n\nCredits used: {credits_reduction}\n\n"
96
+ update_request_count(api_key, credits_reduction)
97
+ except Exception as e:
98
+ yield f"data: [ERROR] {str(e)}\n\n"
99
+
100
+ return Response(generate(), mimetype='text/event-stream')
101
+ else:
102
+ response = API_Inference(messages, model=model_name, stream=False,
103
+ max_tokens=chat_request.max_tokens,
104
+ temperature=chat_request.temperature,
105
+ top_p=chat_request.top_p)
106
+ update_request_count(api_key, credits_reduction)
107
+ prompt_tokens = sum(len(msg['content'].split()) for msg in messages)
108
+ completion_tokens = len(response.split())
109
+ total_tokens = prompt_tokens + completion_tokens
110
+ return jsonify({
111
+ "id": f"chatcmpl-{str(uuid.uuid4())}",
112
+ "object": "chat.completion",
113
+ "created": int(uuid.uuid1().time // 1e7),
114
+ "model": model_name,
115
+ "choices": [
116
+ {
117
+ "index": 0,
118
+ "message": {
119
+ "role": "assistant",
120
+ "content": response
121
+ },
122
+ "finish_reason": "stop"
123
+ }
124
+ ],
125
+ "usage": {
126
+ "prompt_tokens": prompt_tokens,
127
+ "completion_tokens": completion_tokens,
128
+ "total_tokens": total_tokens
129
+ },
130
+ "credits_used": credits_reduction
131
+ })
132
+ except Exception as e:
133
+ return jsonify({'detail': str(e)}), 500
134
+
135
+ @app.route('/rate_limit/status', methods=['GET'])
136
+ @requires_api_key
137
+ def get_rate_limit_status_endpoint(api_key):
138
+ is_valid, error_message = check_api_key_validity(api_key, check_rate_limit=False)
139
+ if not is_valid:
140
+ return jsonify({'detail': error_message}), 401
141
+ return jsonify(get_rate_limit_status(api_key))
142
+
143
+ @app.route('/subscription/status', methods=['GET'])
144
+ @requires_api_key
145
+ def get_subscription_status_endpoint(api_key):
146
+ is_valid, error_message = check_api_key_validity(api_key, check_rate_limit=False)
147
+ if not is_valid:
148
+ return jsonify({'detail': error_message}), 401
149
+ return jsonify(get_subscription_status(api_key))
150
+
151
+ @app.route('/models', methods=['GET'])
152
+ @requires_api_key
153
+ def get_available_models_endpoint(api_key):
154
+ is_valid, error_message = check_api_key_validity(api_key, check_rate_limit=False)
155
+ if not is_valid:
156
+ return jsonify({'detail': error_message}), 401
157
+ return jsonify({"data": [{"id": model} for model in get_available_models().values()]})
158
+
159
+ if __name__ == "__main__":
160
+ app.run(host="0.0.0.0", port=8000)