AIMaster7 commited on
Commit
348c3b8
·
verified ·
1 Parent(s): b5ad5b3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +128 -172
main.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import secrets
5
  import string
6
  import time
7
- from typing import List, Optional, Union, Any, Literal
8
  import httpx
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI
@@ -52,20 +52,9 @@ async def list_models():
52
  return {"object": "list", "data": AVAILABLE_MODELS}
53
 
54
  # === Chat Completion ===
55
- class FunctionCall(BaseModel):
56
- name: str
57
- arguments: str
58
-
59
- class ToolCall(BaseModel):
60
- id: str
61
- type: Literal["function"] = "function"
62
- function: FunctionCall
63
-
64
  class Message(BaseModel):
65
  role: str
66
- content: Optional[str] = None
67
- tool_calls: Optional[List[ToolCall]] = None
68
- name: Optional[str] = None
69
 
70
  class ChatRequest(BaseModel):
71
  messages: List[Message]
@@ -87,222 +76,189 @@ async def chat_completion(request: ChatRequest):
87
  'referer': 'https://www.chatwithmono.xyz/',
88
  'user-agent': 'Mozilla/5.0',
89
  }
90
-
91
  if request.tools:
92
- tool_prompt = """You have access to tools. To call a tool, respond with JSON within <tool_call><tool_call> XML tags.
93
- Format: <tool_call>{"name":<name>,"parameters":{...}}</tool_call>"""
94
- if request.messages and request.messages[0].role == "system":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  request.messages[0].content += "\n\n" + tool_prompt
96
  else:
97
- request.messages.insert(0, Message(role="system", content=tool_prompt))
98
-
99
  request_data = request.model_dump(exclude_unset=True)
 
100
  payload = {
101
  "messages": request_data["messages"],
102
  "model": model_id
103
  }
104
-
105
  if request.stream:
106
  async def event_stream():
107
  created = int(time.time())
108
  is_first_chunk = True
109
  usage_info = None
110
- tool_call_buffer = ""
111
- in_tool_call = False
112
-
113
  try:
114
  async with httpx.AsyncClient(timeout=120) as client:
115
- async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat",
116
- headers=headers, json=payload) as response:
117
  response.raise_for_status()
118
  async for line in response.aiter_lines():
119
- if not line:
120
- continue
121
-
122
  if line.startswith("0:"):
123
  try:
124
  content_piece = json.loads(line[2:])
125
-
126
- # Check for tool call tags
127
- if not in_tool_call and "<tool_call>" in content_piece:
128
- in_tool_call = True
129
- tool_call_buffer = ""
130
-
131
- if in_tool_call:
132
- tool_call_buffer += content_piece
133
- if "</tool_call>" in tool_call_buffer:
134
- # Process complete tool call
135
- try:
136
- # Extract tool call content
137
- start_idx = tool_call_buffer.find("<tool_call>") + len("<tool_call>")
138
- end_idx = tool_call_buffer.find("</tool_call>")
139
- tool_call_str = tool_call_buffer[start_idx:end_idx].strip()
140
-
141
- tool_call_json = json.loads(tool_call_str)
142
- delta = {
143
- "content": None,
144
- "tool_calls": [{
145
- "index": 0,
146
- "id": generate_random_id("call_"),
147
- "type": "function",
148
- "function": {
149
- "name": tool_call_json["name"],
150
- "arguments": json.dumps(tool_call_json["parameters"])
151
- }
152
- }]
153
- }
154
- chunk_data = {
155
- "id": chat_id,
156
- "object": "chat.completion.chunk",
157
- "created": created,
158
- "model": model_id,
159
- "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
160
- "usage": None
161
- }
162
- yield f"data: {json.dumps(chunk_data)}\n\n"
163
- in_tool_call = False
164
- tool_call_buffer = ""
165
- except (json.JSONDecodeError, KeyError):
166
- # Fallback to regular content if parsing fails
167
- in_tool_call = False
168
- tool_call_buffer = ""
169
  else:
170
- # Still building tool call - skip sending this chunk
171
  continue
172
  else:
 
173
  # Regular content
174
- delta = {"content": content_piece}
175
  if is_first_chunk:
 
176
  delta["role"] = "assistant"
177
  is_first_chunk = False
 
 
 
 
 
 
 
 
 
 
178
  chunk_data = {
179
- "id": chat_id,
180
- "object": "chat.completion.chunk",
181
- "created": created,
182
  "model": model_id,
183
  "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
184
  "usage": None
185
  }
186
  yield f"data: {json.dumps(chunk_data)}\n\n"
187
-
188
- except json.JSONDecodeError:
189
- continue
190
-
191
  elif line.startswith(("e:", "d:")):
192
  try:
193
  usage_info = json.loads(line[2:]).get("usage")
194
- except (json.JSONDecodeError, AttributeError):
195
- pass
196
  break
197
-
198
- # Final chunk
 
 
 
 
 
 
 
199
  done_chunk = {
200
- "id": chat_id,
201
- "object": "chat.completion.chunk",
202
- "created": created,
203
- "model": model_id,
204
  "choices": [{
205
  "index": 0,
206
- "delta": {},
207
  "finish_reason": "stop"
208
  }],
209
- "usage": usage_info
210
  }
211
  yield f"data: {json.dumps(done_chunk)}\n\n"
212
- yield "data: [DONE]\n\n"
213
-
214
  except httpx.HTTPStatusError as e:
215
  error_content = {
216
  "error": {
217
- "message": f"Upstream API error: {e.response.status_code}",
218
- "type": "upstream_error",
219
- "code": str(e.response.status_code)
220
  }
221
  }
222
  yield f"data: {json.dumps(error_content)}\n\n"
 
223
  yield "data: [DONE]\n\n"
224
-
225
  return StreamingResponse(event_stream(), media_type="text/event-stream")
226
-
227
- else: # Non-streaming
 
228
  try:
229
  async with httpx.AsyncClient(timeout=120) as client:
230
- response = await client.post(
231
- "https://www.chatwithmono.xyz/api/chat",
232
- headers=headers,
233
- json=payload
234
- )
235
- response.raise_for_status()
236
-
237
- assistant_response = ""
238
- usage_info = {}
239
- for line in response.text.splitlines():
240
- if line.startswith("0:"):
241
- try:
242
- assistant_response += json.loads(line[2:])
243
- except json.JSONDecodeError:
244
- continue
245
- elif line.startswith(("e:", "d:")):
246
- try:
247
- usage_info = json.loads(line[2:]).get("usage", {})
248
- except json.JSONDecodeError:
249
- continue
250
 
251
- tool_calls = None
252
- if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
253
- try:
254
- # Extract tool call content
255
- start_idx = assistant_response.find("<tool_call>") + len("<tool_call>")
256
- end_idx = assistant_response.find("</tool_call>")
257
- tool_call_str = assistant_response[start_idx:end_idx].strip()
258
-
259
- tool_call_json = json.loads(tool_call_str)
260
- tool_calls = [{
261
- "id": generate_random_id("call_"),
262
- "type": "function",
263
- "function": {
264
- "name": tool_call_json["name"],
265
- "arguments": json.dumps(tool_call_json["parameters"])
266
- }
267
- }]
268
- # Clear content for tool call response
269
- assistant_response = None
270
- except (json.JSONDecodeError, KeyError):
271
- # If parsing fails, treat as regular content
272
- tool_calls = None
273
 
274
- return JSONResponse(content={
275
- "id": chat_id,
276
- "object": "chat.completion",
277
- "created": int(time.time()),
278
- "model": model_id,
279
- "choices": [{
280
- "index": 0,
281
- "message": {
282
- "role": "assistant",
283
- "content": assistant_response,
284
- "tool_calls": tool_calls
285
- },
286
- "finish_reason": "stop"
287
- }],
288
- "usage": {
289
- "prompt_tokens": usage_info.get("promptTokens", 0),
290
- "completion_tokens": usage_info.get("completionTokens", 0),
291
- "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
292
- }
293
- })
294
 
295
- except httpx.HTTPStatusError as e:
296
- return JSONResponse(
297
- status_code=e.response.status_code,
298
- content={
299
- "error": {
300
- "message": f"Upstream API error: {e.response.status_code}",
301
- "type": "upstream_error",
302
- "code": str(e.response.status_code)
303
- }
304
  }
305
- )
 
 
 
306
 
307
  # === Image Generation ===
308
  class ImageGenerationRequest(BaseModel):
@@ -420,4 +376,4 @@ async def create_moderation(request: ModerationRequest):
420
  # --- Main Execution ---
421
  if __name__ == "__main__":
422
  import uvicorn
423
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
4
  import secrets
5
  import string
6
  import time
7
+ from typing import List, Optional, Union, Any
8
  import httpx
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI
 
52
  return {"object": "list", "data": AVAILABLE_MODELS}
53
 
54
  # === Chat Completion ===
 
 
 
 
 
 
 
 
 
55
  class Message(BaseModel):
56
  role: str
57
+ content: str
 
 
58
 
59
  class ChatRequest(BaseModel):
60
  messages: List[Message]
 
76
  'referer': 'https://www.chatwithmono.xyz/',
77
  'user-agent': 'Mozilla/5.0',
78
  }
 
79
  if request.tools:
80
+ # Handle tool by giving in system prompt.
81
+ # Tool call must be encoded in <tool_call><tool_call> XML tag.
82
+ tool_prompt = f"""You have access to the following tools . To call a tool, please respond with JSON for a tool call within <tool_call><tool_call> XML tag. Respond in the format {{"name": tool name, "parameters": dictionary of argument name and its value}}. Do not use variables.
83
+ Tools:
84
+ {";".join(f"<tool>{tool}</tool>" for tool in request.tools)}
85
+
86
+ Response Format for tool call:
87
+ For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
88
+ <tool_call>
89
+ {{"name": <function-name>, "arguments": <args-json-object>}}
90
+ </tool_call>
91
+
92
+ Example of tool calling:
93
+ <tool_call>
94
+ {{"name": "get_weather", "parameters": {{"city": "New York"}}}}
95
+ </tool_call>
96
+
97
+ Using tools is recommended.
98
+ """
99
+ if request.messages[0].role == "system":
100
  request.messages[0].content += "\n\n" + tool_prompt
101
  else:
102
+ request.messages.insert(0, {"role": "system", "content": tool_prompt})
 
103
  request_data = request.model_dump(exclude_unset=True)
104
+
105
  payload = {
106
  "messages": request_data["messages"],
107
  "model": model_id
108
  }
 
109
  if request.stream:
110
  async def event_stream():
111
  created = int(time.time())
112
  is_first_chunk = True
113
  usage_info = None
114
+ is_tool_call = False
115
+ chunks_buffer = []
116
+ max_initial_chunks = 4 # Number of initial chunks to buffer
117
  try:
118
  async with httpx.AsyncClient(timeout=120) as client:
119
+ async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
 
120
  response.raise_for_status()
121
  async for line in response.aiter_lines():
122
+ if not line: continue
 
 
123
  if line.startswith("0:"):
124
  try:
125
  content_piece = json.loads(line[2:])
126
+ print(content_piece)
127
+ # Buffer the first few chunks
128
+ if len(chunks_buffer) < max_initial_chunks:
129
+ chunks_buffer.append(content_piece)
130
+ continue
131
+ # Process the buffered chunks if we haven't already
132
+ if chunks_buffer and not is_tool_call:
133
+ full_buffer = ''.join(chunks_buffer)
134
+ if "<tool_call>" in full_buffer:
135
+ print("Tool call detected")
136
+ is_tool_call = True
137
+
138
+ # Process the current chunk
139
+ if is_tool_call:
140
+ chunks_buffer.append(content_piece)
141
+
142
+ full_buffer = ''.join(chunks_buffer)
143
+
144
+ if "</tool_call>" in full_buffer:
145
+ print("Tool call End detected")
146
+ # Process tool call in the current chunk
147
+ tool_call_str = full_buffer.split("<tool_call>")[1].split("</tool_call>")[0]
148
+ tool_call_json = json.loads(tool_call_str.strip())
149
+ delta = {
150
+ "content": None,
151
+ "tool_calls": [{
152
+ "index": 0,
153
+ "id": generate_random_id("call_"),
154
+ "type": "function",
155
+ "function": {
156
+ "name": tool_call_json["name"],
157
+ "arguments": json.dumps(tool_call_json["parameters"])
158
+ }
159
+ }]
160
+ }
161
+ chunk_data = {
162
+ "id": chat_id, "object": "chat.completion.chunk", "created": created,
163
+ "model": model_id,
164
+ "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
165
+ "usage": None
166
+ }
167
+ yield f"data: {json.dumps(chunk_data)}\n\n"
 
 
168
  else:
 
169
  continue
170
  else:
171
+
172
  # Regular content
 
173
  if is_first_chunk:
174
+ delta = {"content": "".join(chunks_buffer), "tool_calls": None}
175
  delta["role"] = "assistant"
176
  is_first_chunk = False
177
+ chunk_data = {
178
+ "id": chat_id, "object": "chat.completion.chunk", "created": created,
179
+ "model": model_id,
180
+ "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
181
+ "usage": None
182
+ }
183
+ yield f"data: {json.dumps(chunk_data)}\n\n"
184
+
185
+ delta = {"content": content_piece, "tool_calls": None}
186
+
187
  chunk_data = {
188
+ "id": chat_id, "object": "chat.completion.chunk", "created": created,
 
 
189
  "model": model_id,
190
  "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
191
  "usage": None
192
  }
193
  yield f"data: {json.dumps(chunk_data)}\n\n"
194
+ except json.JSONDecodeError: continue
 
 
 
195
  elif line.startswith(("e:", "d:")):
196
  try:
197
  usage_info = json.loads(line[2:]).get("usage")
198
+ except (json.JSONDecodeError, AttributeError): pass
 
199
  break
200
+
201
+ final_usage = None
202
+ if usage_info:
203
+ prompt_tokens = usage_info.get("promptTokens", 0)
204
+ completion_tokens = usage_info.get("completionTokens", 0)
205
+ final_usage = {
206
+ "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens,
207
+ "total_tokens": prompt_tokens + completion_tokens,
208
+ }
209
  done_chunk = {
210
+ "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id,
 
 
 
211
  "choices": [{
212
  "index": 0,
213
+ "delta": {"role": "assistant", "content": None, "function_call": None, "tool_calls": None},
214
  "finish_reason": "stop"
215
  }],
216
+ "usage": final_usage
217
  }
218
  yield f"data: {json.dumps(done_chunk)}\n\n"
 
 
219
  except httpx.HTTPStatusError as e:
220
  error_content = {
221
  "error": {
222
+ "message": f"Upstream API error: {e.response.status_code}. Details: {e.response.text}",
223
+ "type": "upstream_error", "code": str(e.response.status_code)
 
224
  }
225
  }
226
  yield f"data: {json.dumps(error_content)}\n\n"
227
+ finally:
228
  yield "data: [DONE]\n\n"
 
229
  return StreamingResponse(event_stream(), media_type="text/event-stream")
230
+ else: # Non-streaming
231
+ assistant_response, usage_info = "", {}
232
+ tool_call_json = None
233
  try:
234
  async with httpx.AsyncClient(timeout=120) as client:
235
+ async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
236
+ response.raise_for_status()
237
+ async for chunk in response.aiter_lines():
238
+ if chunk.startswith("0:"):
239
+ try: assistant_response += json.loads(chunk[2:])
240
+ except: continue
241
+ elif chunk.startswith(("e:", "d:")):
242
+ try: usage_info = json.loads(chunk[2:]).get("usage", {})
243
+ except: continue
 
 
 
 
 
 
 
 
 
 
 
244
 
245
+ if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
246
+ tool_call_str = assistant_response.split("<tool_call>")[1].split("</tool_call>")[0]
247
+ tool_call_json = json.loads(tool_call_str.strip())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
+ return JSONResponse(content={
251
+ "id": chat_id, "object": "chat.completion", "created": int(time.time()), "model": model_id,
252
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": assistant_response if tool_call_json is None else None, "tool_calls": tool_call_json}, "finish_reason": "stop"}],
253
+ "usage": {
254
+ "prompt_tokens": usage_info.get("promptTokens", 0),
255
+ "completion_tokens": usage_info.get("completionTokens", 0),
256
+ "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
 
 
257
  }
258
+ })
259
+ except httpx.HTTPStatusError as e:
260
+ return JSONResponse(status_code=e.response.status_code, content={"error": {"message": f"Upstream API error. Details: {e.response.text}", "type": "upstream_error"}})
261
+
262
 
263
  # === Image Generation ===
264
  class ImageGenerationRequest(BaseModel):
 
376
  # --- Main Execution ---
377
  if __name__ == "__main__":
378
  import uvicorn
379
+ uvicorn.run(app, host="0.0.0.0", port=8000)