DevsDoCode commited on
Commit
cae172a
·
verified ·
1 Parent(s): c002f09

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -90
app.py CHANGED
@@ -4,7 +4,6 @@ import uuid
4
  import json
5
  from typing import List, Optional
6
  from pydantic import BaseModel, ValidationError
7
- import logging
8
  from API_provider import API_Inference
9
  from core_logic import (
10
  check_api_key_validity,
@@ -16,7 +15,6 @@ from core_logic import (
16
  )
17
 
18
  app = Flask(__name__)
19
- logging.basicConfig(level=logging.DEBUG)
20
 
21
  class Message(BaseModel):
22
  role: str
@@ -53,94 +51,92 @@ def index():
53
  @app.route('/chat/completions', methods=['POST', 'GET'])
54
  @requires_api_key
55
  def chat_completions(api_key):
56
- return jsonify({'detail': "YOUUUUUUUUUU"}), 500
57
- # logging.info("Received request for chat completions")
58
- # print("requess received")
59
- # try:
60
- # logging.info("Received request for chat completions")
61
- # # Parse and validate request data
62
- # try:
63
- # data = request.get_json()
64
- # chat_request = ChatCompletionRequest(**data)
65
- # except ValidationError as e:
66
- # return jsonify({'detail': e.errors()}), 400
67
-
68
- # # Check API key validity and rate limit
69
- # is_valid, error_message = check_api_key_validity(api_key)
70
- # if not is_valid:
71
- # return jsonify({'detail': error_message}), 401
72
-
73
- # messages = [{"role": msg.role, "content": msg.content} for msg in chat_request.messages]
74
-
75
- # # Get model info
76
- # model_info = get_model_info(chat_request.model)
77
- # if not model_info:
78
- # return jsonify({'detail': 'Invalid model specified'}), 400
79
-
80
- # # Model mapping
81
- # model_mapping = {
82
- # "meta-llama-405b-turbo": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
83
- # "claude-3.5-sonnet": "claude-3-sonnet-20240229",
84
- # }
85
- # model_name = model_mapping.get(chat_request.model, chat_request.model)
86
- # credits_reduction = {
87
- # "gpt-4o": 1,
88
- # "claude-3-sonnet-20240229": 1,
89
- # "gemini-1.5-pro": 1,
90
- # "gemini-1-5-flash": 1,
91
- # "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 1,
92
- # "o1-mini": 2,
93
- # "o1-preview": 3,
94
- # }.get(model_name, 0)
95
-
96
- # if chat_request.stream:
97
- # def generate():
98
- # try:
99
- # for chunk in API_Inference(messages, model=model_name, stream=True,
100
- # max_tokens=chat_request.max_tokens,
101
- # temperature=chat_request.temperature,
102
- # top_p=chat_request.top_p):
103
- # data = json.dumps({'choices': [{'delta': {'content': chunk}}]})
104
- # yield f"data: {data}\n\n"
105
- # yield f"data: [DONE]\n\nCredits used: {credits_reduction}\n\n"
106
- # update_request_count(api_key, credits_reduction)
107
- # except Exception as e:
108
- # yield f"data: [ERROR] {str(e)}\n\n"
109
-
110
- # return Response(generate(), mimetype='text/event-stream')
111
- # else:
112
- # response = API_Inference(messages, model=model_name, stream=False,
113
- # max_tokens=chat_request.max_tokens,
114
- # temperature=chat_request.temperature,
115
- # top_p=chat_request.top_p)
116
- # update_request_count(api_key, credits_reduction)
117
- # prompt_tokens = sum(len(msg['content'].split()) for msg in messages)
118
- # completion_tokens = len(response.split())
119
- # total_tokens = prompt_tokens + completion_tokens
120
- # return jsonify({
121
- # "id": f"chatcmpl-{str(uuid.uuid4())}",
122
- # "object": "chat.completion",
123
- # "created": int(uuid.uuid1().time // 1e7),
124
- # "model": model_name,
125
- # "choices": [
126
- # {
127
- # "index": 0,
128
- # "message": {
129
- # "role": "assistant",
130
- # "content": response
131
- # },
132
- # "finish_reason": "stop"
133
- # }
134
- # ],
135
- # "usage": {
136
- # "prompt_tokens": prompt_tokens,
137
- # "completion_tokens": completion_tokens,
138
- # "total_tokens": total_tokens
139
- # },
140
- # "credits_used": credits_reduction
141
- # })
142
- # except Exception as e:
143
- # return jsonify({'detail': str(e)}), 500
144
 
145
  @app.route('/rate_limit/status', methods=['GET'])
146
  @requires_api_key
 
4
  import json
5
  from typing import List, Optional
6
  from pydantic import BaseModel, ValidationError
 
7
  from API_provider import API_Inference
8
  from core_logic import (
9
  check_api_key_validity,
 
15
  )
16
 
17
  app = Flask(__name__)
 
18
 
19
  class Message(BaseModel):
20
  role: str
 
51
  @app.route('/chat/completions', methods=['POST', 'GET'])
52
  @requires_api_key
53
  def chat_completions(api_key):
54
+ print("requess received")
55
+ try:
56
+ logging.info("Received request for chat completions")
57
+ # Parse and validate request data
58
+ try:
59
+ data = request.get_json()
60
+ chat_request = ChatCompletionRequest(**data)
61
+ except ValidationError as e:
62
+ return jsonify({'detail': e.errors()}), 400
63
+
64
+ # Check API key validity and rate limit
65
+ is_valid, error_message = check_api_key_validity(api_key)
66
+ if not is_valid:
67
+ return jsonify({'detail': error_message}), 401
68
+
69
+ messages = [{"role": msg.role, "content": msg.content} for msg in chat_request.messages]
70
+
71
+ # Get model info
72
+ model_info = get_model_info(chat_request.model)
73
+ if not model_info:
74
+ return jsonify({'detail': 'Invalid model specified'}), 400
75
+
76
+ # Model mapping
77
+ model_mapping = {
78
+ "meta-llama-405b-turbo": "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
79
+ "claude-3.5-sonnet": "claude-3-sonnet-20240229",
80
+ }
81
+ model_name = model_mapping.get(chat_request.model, chat_request.model)
82
+ credits_reduction = {
83
+ "gpt-4o": 1,
84
+ "claude-3-sonnet-20240229": 1,
85
+ "gemini-1.5-pro": 1,
86
+ "gemini-1-5-flash": 1,
87
+ "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": 1,
88
+ "o1-mini": 2,
89
+ "o1-preview": 3,
90
+ }.get(model_name, 0)
91
+
92
+ if chat_request.stream:
93
+ def generate():
94
+ try:
95
+ for chunk in API_Inference(messages, model=model_name, stream=True,
96
+ max_tokens=chat_request.max_tokens,
97
+ temperature=chat_request.temperature,
98
+ top_p=chat_request.top_p):
99
+ data = json.dumps({'choices': [{'delta': {'content': chunk}}]})
100
+ yield f"data: {data}\n\n"
101
+ yield f"data: [DONE]\n\nCredits used: {credits_reduction}\n\n"
102
+ update_request_count(api_key, credits_reduction)
103
+ except Exception as e:
104
+ yield f"data: [ERROR] {str(e)}\n\n"
105
+
106
+ return Response(generate(), mimetype='text/event-stream')
107
+ else:
108
+ response = API_Inference(messages, model=model_name, stream=False,
109
+ max_tokens=chat_request.max_tokens,
110
+ temperature=chat_request.temperature,
111
+ top_p=chat_request.top_p)
112
+ update_request_count(api_key, credits_reduction)
113
+ prompt_tokens = sum(len(msg['content'].split()) for msg in messages)
114
+ completion_tokens = len(response.split())
115
+ total_tokens = prompt_tokens + completion_tokens
116
+ return jsonify({
117
+ "id": f"chatcmpl-{str(uuid.uuid4())}",
118
+ "object": "chat.completion",
119
+ "created": int(uuid.uuid1().time // 1e7),
120
+ "model": model_name,
121
+ "choices": [
122
+ {
123
+ "index": 0,
124
+ "message": {
125
+ "role": "assistant",
126
+ "content": response
127
+ },
128
+ "finish_reason": "stop"
129
+ }
130
+ ],
131
+ "usage": {
132
+ "prompt_tokens": prompt_tokens,
133
+ "completion_tokens": completion_tokens,
134
+ "total_tokens": total_tokens
135
+ },
136
+ "credits_used": credits_reduction
137
+ })
138
+ except Exception as e:
139
+ return jsonify({'detail': str(e)}), 500
 
 
140
 
141
  @app.route('/rate_limit/status', methods=['GET'])
142
  @requires_api_key