ethiotech4848 commited on
Commit
7b7f6a9
·
verified ·
1 Parent(s): b1dd449

Create query.md

Browse files
Files changed (1) hide show
  1. query.md +384 -0
query.md ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```python
2
+ import requests
3
+ import json
4
+
5
+ # Build model mapping
6
+ original_models = [
7
+ # OpenAI Models
8
+ "gpt-3.5-turbo",
9
+ "gpt-3.5-turbo-202201",
10
+ "gpt-4o",
11
+ "gpt-4o-2024-05-13",
12
+ "o1-preview",
13
+
14
+ # Claude Models
15
+ "claude",
16
+ "claude-3-5-sonnet",
17
+ "claude-sonnet-3.5",
18
+ "claude-3-5-sonnet-20240620",
19
+
20
+ # Meta/LLaMA Models
21
+ "@cf/meta/llama-2-7b-chat-fp16",
22
+ "@cf/meta/llama-2-7b-chat-int8",
23
+ "@cf/meta/llama-3-8b-instruct",
24
+ "@cf/meta/llama-3.1-8b-instruct",
25
+ "@cf/meta-llama/llama-2-7b-chat-hf-lora",
26
+ "llama-3.1-405b",
27
+ "llama-3.1-70b",
28
+ "llama-3.1-8b",
29
+ "meta-llama/Llama-2-7b-chat-hf",
30
+ "meta-llama/Llama-3.1-70B-Instruct",
31
+ "meta-llama/Llama-3.1-8B-Instruct",
32
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
33
+ "meta-llama/Llama-3.2-1B-Instruct",
34
+ "meta-llama/Llama-3.2-3B-Instruct",
35
+ "meta-llama/Llama-3.2-90B-Vision-Instruct",
36
+ "meta-llama/Llama-Guard-3-8B",
37
+ "meta-llama/Meta-Llama-3-70B-Instruct",
38
+ "meta-llama/Meta-Llama-3-8B-Instruct",
39
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
40
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
41
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
42
+
43
+ # Mistral Models
44
+ "mistral",
45
+ "mistral-large",
46
+ "@cf/mistral/mistral-7b-instruct-v0.1",
47
+ "@cf/mistral/mistral-7b-instruct-v0.2-lora",
48
+ "@hf/mistralai/mistral-7b-instruct-v0.2",
49
+ "mistralai/Mistral-7B-Instruct-v0.2",
50
+ "mistralai/Mistral-7B-Instruct-v0.3",
51
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
52
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
53
+
54
+ # Qwen Models
55
+ "@cf/qwen/qwen1.5-0.5b-chat",
56
+ "@cf/qwen/qwen1.5-1.8b-chat",
57
+ "@cf/qwen/qwen1.5-7b-chat-awq",
58
+ "@cf/qwen/qwen1.5-14b-chat-awq",
59
+ "Qwen/Qwen2.5-3B-Instruct",
60
+ "Qwen/Qwen2.5-72B-Instruct",
61
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
62
+
63
+ # Google/Gemini Models
64
+ "@cf/google/gemma-2b-it-lora",
65
+ "@cf/google/gemma-7b-it-lora",
66
+ "@hf/google/gemma-7b-it",
67
+ "google/gemma-1.1-2b-it",
68
+ "google/gemma-1.1-7b-it",
69
+ "gemini-pro",
70
+ "gemini-1.5-pro",
71
+ "gemini-1.5-pro-latest",
72
+ "gemini-1.5-flash",
73
+
74
+ # Cohere Models
75
+ "c4ai-aya-23-35b",
76
+ "c4ai-aya-23-8b",
77
+ "command",
78
+ "command-light",
79
+ "command-light-nightly",
80
+ "command-nightly",
81
+ "command-r",
82
+ "command-r-08-2024",
83
+ "command-r-plus",
84
+ "command-r-plus-08-2024",
85
+ "rerank-english-v2.0",
86
+ "rerank-english-v3.0",
87
+ "rerank-multilingual-v2.0",
88
+ "rerank-multilingual-v3.0",
89
+
90
+ # Microsoft Models
91
+ "@cf/microsoft/phi-2",
92
+ "microsoft/DialoGPT-medium",
93
+ "microsoft/Phi-3-medium-4k-instruct",
94
+ "microsoft/Phi-3-mini-4k-instruct",
95
+ "microsoft/Phi-3.5-mini-instruct",
96
+ "microsoft/WizardLM-2-8x22B",
97
+
98
+ # Yi Models
99
+ "01-ai/Yi-1.5-34B-Chat",
100
+ "01-ai/Yi-34B-Chat",
101
+ ]
102
+
103
+ # Create mapping from simplified model names to original model names
104
+ model_mapping = {}
105
+ simplified_models = []
106
+
107
+ for original_model in original_models:
108
+ simplified_name = original_model.split('/')[-1]
109
+ if simplified_name in model_mapping:
110
+ # Conflict detected, handle as per instructions
111
+ print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
112
+ continue
113
+ model_mapping[simplified_name] = original_model
114
+ simplified_models.append(simplified_name)
115
+
116
+ def generate(
117
+ model,
118
+ messages,
119
+ temperature=0.7,
120
+ top_p=1.0,
121
+ n=1,
122
+ stream=False,
123
+ stop=None,
124
+ max_tokens=None,
125
+ presence_penalty=0.0,
126
+ frequency_penalty=0.0,
127
+ logit_bias=None,
128
+ user=None,
129
+ timeout=30,
130
+ ):
131
+ """
132
+ Generates a chat completion using the provided model and messages.
133
+ """
134
+ # Use the simplified model names
135
+ models = simplified_models
136
+
137
+ if model not in models:
138
+ raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
139
+
140
+ # Map simplified model name to original model name
141
+ original_model = model_mapping[model]
142
+
143
+ api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
144
+
145
+ headers = {
146
+ "authority": "chat.typegpt.net",
147
+ "accept": "application/json, text/event-stream",
148
+ "accept-language": "en-US,en;q=0.9",
149
+ "content-type": "application/json",
150
+ "origin": "https://chat.typegpt.net",
151
+ "referer": "https://chat.typegpt.net/",
152
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
153
+ }
154
+
155
+ # Payload
156
+ payload = {
157
+ "messages": messages,
158
+ "stream": stream,
159
+ "model": original_model,
160
+ "temperature": temperature,
161
+ "presence_penalty": presence_penalty,
162
+ "frequency_penalty": frequency_penalty,
163
+ "top_p": top_p,
164
+ }
165
+
166
+ # Only include max_tokens if it's not None
167
+ if max_tokens is not None:
168
+ payload["max_tokens"] = max_tokens
169
+
170
+ # Only include 'stop' if it's not None
171
+ if stop is not None:
172
+ payload["stop"] = stop
173
+
174
+ # Check if logit_bias is provided
175
+ if logit_bias is not None:
176
+ payload["logit_bias"] = logit_bias
177
+
178
+ # Include 'user' if provided
179
+ if user is not None:
180
+ payload["user"] = user
181
+
182
+ # Start the request
183
+ session = requests.Session()
184
+ response = session.post(
185
+ api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
186
+ )
187
+
188
+ if not response.ok:
189
+ raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
190
+
191
+ def stream_response():
192
+ for line in response.iter_lines():
193
+ if line:
194
+ line = line.decode("utf-8")
195
+ if line.startswith("data: "):
196
+ line = line[6:] # Remove "data: " prefix
197
+ if line.strip() == "[DONE]":
198
+ break
199
+ try:
200
+ data = json.loads(line)
201
+ yield data
202
+ except json.JSONDecodeError:
203
+ continue
204
+
205
+ if stream:
206
+ return stream_response()
207
+ else:
208
+ return response.json()
209
+
210
+ if __name__ == "__main__":
211
+ # Example usage
212
+ # model = "claude-3-5-sonnet-20240620"
213
+ # model = "qwen1.5-0.5b-chat"
214
+ # model = "llama-2-7b-chat-fp16"
215
+ model = "gpt-3.5-turbo"
216
+ messages = [
217
+ {"role": "system", "content": "Be Detailed"},
218
+ {"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
219
+ ]
220
+
221
+ # try:
222
+ # # For non-streamed response
223
+ # response = generate(
224
+ # model=model,
225
+ # messages=messages,
226
+ # temperature=0.5,
227
+ # max_tokens=4000,
228
+ # stream=False # Change to True for streaming
229
+ # )
230
+ # if 'choices' in response:
231
+ # reply = response['choices'][0]['message']['content']
232
+ # print(reply)
233
+ # else:
234
+ # print("No response received.")
235
+ # except Exception as e:
236
+ # print(e)
237
+
238
+
239
+ try:
240
+ # For streamed response
241
+ response = generate(
242
+ model=model,
243
+ messages=messages,
244
+ temperature=0.5,
245
+ max_tokens=4000,
246
+ stream=True, # Change to False for non-streamed response
247
+ )
248
+ for data in response:
249
+ if 'choices' in data:
250
+ reply = data['choices'][0]['delta']['content']
251
+ print(reply, end="", flush=True)
252
+ else:
253
+ print("No response received.")
254
+ except Exception as e:
255
+ print(e)
256
+ ```
257
+
258
+ ```python
259
+ from fastapi import FastAPI, Request, Response
260
+ from fastapi.responses import JSONResponse, StreamingResponse
261
+ from fastapi.middleware.cors import CORSMiddleware
262
+ import uvicorn
263
+ import asyncio
264
+ import json
265
+ import requests
266
+
267
+ from TYPEGPT.typegpt_api import generate, model_mapping, simplified_models
268
+ from api_info import developer_info
269
+
270
+ app = FastAPI()
271
+
272
+ # Set up CORS middleware if needed
273
+ app.add_middleware(
274
+ CORSMiddleware,
275
+ allow_origins=["*"],
276
+ allow_credentials=True,
277
+ allow_methods=["*"],
278
+ allow_headers=["*"],
279
+ )
280
+
281
+ @app.get("/health_check")
282
+ async def health_check():
283
+ return {"status": "OK"}
284
+
285
+ @app.get("/models")
286
+ async def get_models():
287
+ # Retrieve models from TypeGPT API and forward the response
288
+ api_endpoint = "https://chat.typegpt.net/api/openai/v1/models"
289
+ try:
290
+ response = requests.get(api_endpoint)
291
+ # return response.text
292
+ return JSONResponse(content=response.json(), status_code=response.status_code)
293
+ except Exception as e:
294
+ return JSONResponse(content={"error": str(e)}, status_code=500)
295
+
296
+ @app.post("/chat/completions")
297
+ async def chat_completions(request: Request):
298
+ # Receive the JSON payload
299
+ try:
300
+ body = await request.json()
301
+ except Exception as e:
302
+ return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
303
+
304
+ # Extract parameters
305
+ model = body.get("model")
306
+ messages = body.get("messages")
307
+ temperature = body.get("temperature", 0.7)
308
+ top_p = body.get("top_p", 1.0)
309
+ n = body.get("n", 1)
310
+ stream = body.get("stream", False)
311
+ stop = body.get("stop")
312
+ max_tokens = body.get("max_tokens")
313
+ presence_penalty = body.get("presence_penalty", 0.0)
314
+ frequency_penalty = body.get("frequency_penalty", 0.0)
315
+ logit_bias = body.get("logit_bias")
316
+ user = body.get("user")
317
+ timeout = 30 # or set based on your preference
318
+
319
+ # Validate required parameters
320
+ if not model:
321
+ return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
322
+ if not messages:
323
+ return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
324
+
325
+ # Call the generate function
326
+ try:
327
+ if stream:
328
+ async def generate_stream():
329
+ response = generate(
330
+ model=model,
331
+ messages=messages,
332
+ temperature=temperature,
333
+ top_p=top_p,
334
+ n=n,
335
+ stream=True,
336
+ stop=stop,
337
+ max_tokens=max_tokens,
338
+ presence_penalty=presence_penalty,
339
+ frequency_penalty=frequency_penalty,
340
+ logit_bias=logit_bias,
341
+ user=user,
342
+ timeout=timeout,
343
+ )
344
+
345
+ for chunk in response:
346
+ yield f"data: {json.dumps(chunk)}\n\n"
347
+ yield "data: [DONE]\n\n"
348
+
349
+ return StreamingResponse(
350
+ generate_stream(),
351
+ media_type="text/event-stream",
352
+ headers={
353
+ "Cache-Control": "no-cache",
354
+ "Connection": "keep-alive",
355
+ "Transfer-Encoding": "chunked"
356
+ }
357
+ )
358
+ else:
359
+ response = generate(
360
+ model=model,
361
+ messages=messages,
362
+ temperature=temperature,
363
+ top_p=top_p,
364
+ n=n,
365
+ stream=False,
366
+ stop=stop,
367
+ max_tokens=max_tokens,
368
+ presence_penalty=presence_penalty,
369
+ frequency_penalty=frequency_penalty,
370
+ logit_bias=logit_bias,
371
+ user=user,
372
+ timeout=timeout,
373
+ )
374
+ return JSONResponse(content=response)
375
+ except Exception as e:
376
+ return JSONResponse(content={"error": str(e)}, status_code=500)
377
+
378
+ @app.get("/developer_info")
379
+ async def get_developer_info():
380
+ return JSONResponse(content=developer_info)
381
+
382
+ if __name__ == "__main__":
383
+ uvicorn.run(app, host="0.0.0.0", port=8000)
384
+ ```