tastypear commited on
Commit
0f3b6b0
1 Parent(s): f38a522

add models list route

Browse files
Files changed (1) hide show
  1. main.py +156 -132
main.py CHANGED
@@ -1,132 +1,156 @@
1
- import gevent.pywsgi
2
- from gevent import monkey;monkey.patch_all()
3
- from flask import Flask, request, Response
4
- import argparse
5
- import requests
6
- import random
7
- import string
8
- import time
9
- import json
10
- import os
11
-
12
- app = Flask(__name__)
13
-
14
- parser = argparse.ArgumentParser(description="An example of Qwen demo with a similar API to OAI.")
15
- parser.add_argument("--host", type=str, help="Set the ip address.(default: 0.0.0.0)", default='0.0.0.0')
16
- parser.add_argument("--port", type=int, help="Set the port.(default: 7860)", default=7860)
17
- args = parser.parse_args()
18
-
19
- base_url = os.getenv('MODEL_BASE_URL')
20
-
21
- @app.route("/", methods=["GET"])
22
- def index():
23
- return Response(f'QW1_5 OpenAI Compatible API<br><br>'+
24
- f'Set "{os.getenv("SPACE_URL")}/api" as proxy (or API Domain) in your Chatbot.<br><br>'+
25
- f'The complete API is: {os.getenv("SPACE_URL")}/api/v1/chat/completions')
26
-
27
- @app.route("/api/v1/chat/completions", methods=["POST", "OPTIONS"])
28
- @app.route("/v1/chat/completions", methods=["POST", "OPTIONS"])
29
- def chat_completions():
30
-
31
- if request.method == "OPTIONS":
32
- return Response(
33
- headers={
34
- "Access-Control-Allow-Origin": "*",
35
- "Access-Control-Allow-Headers": "*",
36
- }
37
- )
38
-
39
- data = request.get_json()
40
-
41
- # reorganize data
42
- system = "You are a helpful assistant."
43
- chat_history = []
44
- prompt = ""
45
-
46
- if "messages" in data:
47
- messages = data["messages"]
48
- message_size = len(messages)
49
-
50
- prompt = messages[-1].get("content")
51
- for i in range(message_size - 1):
52
- role_this = messages[i].get("role")
53
- role_next = messages[i + 1].get("role")
54
- if role_this == "system":
55
- system = messages[i].get("content")
56
- elif role_this == "user":
57
- if role_next == "assistant":
58
- chat_history.append(
59
- [messages[i].get("content"), messages[i + 1].get("content")]
60
- )
61
- else:
62
- chat_history.append([messages[i].get("content"), " "])
63
-
64
- # print(f'{system = }')
65
- # print(f'{chat_history = }')
66
- # print(f'{prompt = }')
67
-
68
- fn_index = 0
69
-
70
- # gen a random char(11) hash
71
- chars = string.ascii_lowercase + string.digits
72
- session_hash = "".join(random.choice(chars) for _ in range(11))
73
-
74
- json_prompt = {
75
- "data": [prompt, chat_history, system],
76
- "fn_index": fn_index,
77
- "session_hash": session_hash,
78
- }
79
-
80
- def generate():
81
- response = requests.post(f"{base_url}/queue/join", json=json_prompt)
82
- url = f"{base_url}/queue/data?session_hash={session_hash}"
83
- data = requests.get(url, stream=True)
84
-
85
- time_now = int(time.time())
86
-
87
- for line in data.iter_lines():
88
- if line:
89
- decoded_line = line.decode("utf-8")
90
- json_line = json.loads(decoded_line[6:])
91
- if json_line["msg"] == "process_starts":
92
- res_data = gen_res_data({}, time_now=time_now, start=True)
93
- yield f"data: {json.dumps(res_data)}\n\n"
94
- elif json_line["msg"] == "process_generating":
95
- res_data = gen_res_data(json_line, time_now=time_now)
96
- yield f"data: {json.dumps(res_data)}\n\n"
97
- elif json_line["msg"] == "process_completed":
98
- yield "data: [DONE]"
99
-
100
- return Response(
101
- generate(),
102
- mimetype="text/event-stream",
103
- headers={
104
- "Access-Control-Allow-Origin": "*",
105
- "Access-Control-Allow-Headers": "*",
106
- },
107
- )
108
-
109
-
110
- def gen_res_data(data, time_now=0, start=False):
111
- res_data = {
112
- "id": "chatcmpl",
113
- "object": "chat.completion.chunk",
114
- "created": time_now,
115
- "model": "qwen1_5",
116
- "choices": [{"index": 0, "finish_reason": None}],
117
- }
118
-
119
- if start:
120
- res_data["choices"][0]["delta"] = {"role": "assistant", "content": ""}
121
- else:
122
- chat_pair = data["output"]["data"][1]
123
- if chat_pair == []:
124
- res_data["choices"][0]["finish_reason"] = "stop"
125
- else:
126
- res_data["choices"][0]["delta"] = {"content": chat_pair[-1][-1]}
127
- return res_data
128
-
129
-
130
- if __name__ == "__main__":
131
- # app.run(host=args.host, port=args.port, debug=True)
132
- gevent.pywsgi.WSGIServer((args.host, args.port), app).serve_forever()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gevent.pywsgi
2
+ from gevent import monkey;monkey.patch_all()
3
+ from flask import Flask, request, Response, jsonify
4
+ import argparse
5
+ import requests
6
+ import random
7
+ import string
8
+ import time
9
+ import json
10
+ import os
11
+
12
+ app = Flask(__name__)
13
+ app.json.sort_keys = False
14
+
15
+ parser = argparse.ArgumentParser(description="An example of Qwen demo with a similar API to OAI.")
16
+ parser.add_argument("--host", type=str, help="Set the ip address.(default: 0.0.0.0)", default='0.0.0.0')
17
+ parser.add_argument("--port", type=int, help="Set the port.(default: 7860)", default=7860)
18
+ args = parser.parse_args()
19
+
20
+ base_url = os.getenv('MODEL_BASE_URL')
21
+
22
+ @app.route('/api/v1/models', methods=["GET", "POST"])
23
+ @app.route('/v1/models', methods=["GET", "POST"])
24
+ def model_list():
25
+ time_now = int(time.time())
26
+ model_list = {
27
+ "object": "list",
28
+ "data": [
29
+ {
30
+ "id": "qwen",
31
+ "object": "model",
32
+ "created": time_now,
33
+ "owned_by": "tastypear"
34
+ },
35
+ {
36
+ "id": "gpt-3.5-turbo",
37
+ "object": "model",
38
+ "created": time_now,
39
+ "owned_by": "tastypear"
40
+ }
41
+ ]
42
+ }
43
+ return jsonify(model_list)
44
+
45
+ @app.route("/", methods=["GET"])
46
+ def index():
47
+ return Response(f'QW1_5 OpenAI Compatible API<br><br>'+
48
+ f'Set "{os.getenv("SPACE_URL")}/api" as proxy (or API Domain) in your Chatbot.<br><br>'+
49
+ f'The complete API is: {os.getenv("SPACE_URL")}/api/v1/chat/completions')
50
+
51
+ @app.route("/api/v1/chat/completions", methods=["POST", "OPTIONS"])
52
+ @app.route("/v1/chat/completions", methods=["POST", "OPTIONS"])
53
+ def chat_completions():
54
+
55
+ if request.method == "OPTIONS":
56
+ return Response(
57
+ headers={
58
+ "Access-Control-Allow-Origin": "*",
59
+ "Access-Control-Allow-Headers": "*",
60
+ }
61
+ )
62
+
63
+ data = request.get_json()
64
+
65
+ # reorganize data
66
+ system = "You are a helpful assistant."
67
+ chat_history = []
68
+ prompt = ""
69
+
70
+ if "messages" in data:
71
+ messages = data["messages"]
72
+ message_size = len(messages)
73
+
74
+ prompt = messages[-1].get("content")
75
+ for i in range(message_size - 1):
76
+ role_this = messages[i].get("role")
77
+ role_next = messages[i + 1].get("role")
78
+ if role_this == "system":
79
+ system = messages[i].get("content")
80
+ elif role_this == "user":
81
+ if role_next == "assistant":
82
+ chat_history.append(
83
+ [messages[i].get("content"), messages[i + 1].get("content")]
84
+ )
85
+ else:
86
+ chat_history.append([messages[i].get("content"), " "])
87
+
88
+ # print(f'{system = }')
89
+ # print(f'{chat_history = }')
90
+ # print(f'{prompt = }')
91
+
92
+ fn_index = 0
93
+
94
+ # gen a random char(11) hash
95
+ chars = string.ascii_lowercase + string.digits
96
+ session_hash = "".join(random.choice(chars) for _ in range(11))
97
+
98
+ json_prompt = {
99
+ "data": [prompt, chat_history, system],
100
+ "fn_index": fn_index,
101
+ "session_hash": session_hash,
102
+ }
103
+
104
+ def generate():
105
+ response = requests.post(f"{base_url}/queue/join", json=json_prompt)
106
+ url = f"{base_url}/queue/data?session_hash={session_hash}"
107
+ data = requests.get(url, stream=True)
108
+
109
+ time_now = int(time.time())
110
+
111
+ for line in data.iter_lines():
112
+ if line:
113
+ decoded_line = line.decode("utf-8")
114
+ json_line = json.loads(decoded_line[6:])
115
+ if json_line["msg"] == "process_starts":
116
+ res_data = gen_res_data({}, time_now=time_now, start=True)
117
+ yield f"data: {json.dumps(res_data)}\n\n"
118
+ elif json_line["msg"] == "process_generating":
119
+ res_data = gen_res_data(json_line, time_now=time_now)
120
+ yield f"data: {json.dumps(res_data)}\n\n"
121
+ elif json_line["msg"] == "process_completed":
122
+ yield "data: [DONE]"
123
+
124
+ return Response(
125
+ generate(),
126
+ mimetype="text/event-stream",
127
+ headers={
128
+ "Access-Control-Allow-Origin": "*",
129
+ "Access-Control-Allow-Headers": "*",
130
+ },
131
+ )
132
+
133
+
134
+ def gen_res_data(data, time_now=0, start=False):
135
+ res_data = {
136
+ "id": "chatcmpl",
137
+ "object": "chat.completion.chunk",
138
+ "created": time_now,
139
+ "model": "qwen1_5",
140
+ "choices": [{"index": 0, "finish_reason": None}],
141
+ }
142
+
143
+ if start:
144
+ res_data["choices"][0]["delta"] = {"role": "assistant", "content": ""}
145
+ else:
146
+ chat_pair = data["output"]["data"][1]
147
+ if chat_pair == []:
148
+ res_data["choices"][0]["finish_reason"] = "stop"
149
+ else:
150
+ res_data["choices"][0]["delta"] = {"content": chat_pair[-1][-1]}
151
+ return res_data
152
+
153
+
154
+ if __name__ == "__main__":
155
+ # app.run(host=args.host, port=args.port, debug=True)
156
+ gevent.pywsgi.WSGIServer((args.host, args.port), app).serve_forever()