Spaces:

AIMaster7
/

Mono

Running

App Files Files Community

AIMaster7 commited on Jul 4

Commit

348c3b8

verified ·

1 Parent(s): b5ad5b3

Update main.py

Browse files

Files changed (1) hide show

main.py +128 -172

main.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 import secrets
 import string
 import time
-from typing import List, Optional, Union, Any, Literal
 import httpx
 from dotenv import load_dotenv
 from fastapi import FastAPI
@@ -52,20 +52,9 @@ async def list_models():
     return {"object": "list", "data": AVAILABLE_MODELS}
 # === Chat Completion ===
-class FunctionCall(BaseModel):
-    name: str
-    arguments: str
-class ToolCall(BaseModel):
-    id: str
-    type: Literal["function"] = "function"
-    function: FunctionCall
 class Message(BaseModel):
     role: str
-    content: Optional[str] = None
-    tool_calls: Optional[List[ToolCall]] = None
-    name: Optional[str] = None
 class ChatRequest(BaseModel):
     messages: List[Message]
@@ -87,222 +76,189 @@ async def chat_completion(request: ChatRequest):
         'referer': 'https://www.chatwithmono.xyz/',
         'user-agent': 'Mozilla/5.0',
     }
     if request.tools:
-        tool_prompt = """You have access to tools. To call a tool, respond with JSON within <tool_call><tool_call> XML tags.
-Format: <tool_call>{"name":<name>,"parameters":{...}}</tool_call>"""
-        if request.messages and request.messages[0].role == "system":
             request.messages[0].content += "\n\n" + tool_prompt
         else:
-            request.messages.insert(0, Message(role="system", content=tool_prompt))
     request_data = request.model_dump(exclude_unset=True)
     payload = {
         "messages": request_data["messages"],
         "model": model_id
     }
     if request.stream:
         async def event_stream():
             created = int(time.time())
             is_first_chunk = True
             usage_info = None
-            tool_call_buffer = ""
-            in_tool_call = False
             try:
                 async with httpx.AsyncClient(timeout=120) as client:
-                    async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat",
-                                            headers=headers, json=payload) as response:
                         response.raise_for_status()
                         async for line in response.aiter_lines():
-                            if not line:
-                                continue
                             if line.startswith("0:"):
                                 try:
                                     content_piece = json.loads(line[2:])
-                                    # Check for tool call tags
-                                    if not in_tool_call and "<tool_call>" in content_piece:
-                                        in_tool_call = True
-                                        tool_call_buffer = ""
-                                    if in_tool_call:
-                                        tool_call_buffer += content_piece
-                                        if "</tool_call>" in tool_call_buffer:
-                                            # Process complete tool call
-                                            try:
-                                                # Extract tool call content
-                                                start_idx = tool_call_buffer.find("<tool_call>") + len("<tool_call>")
-                                                end_idx = tool_call_buffer.find("</tool_call>")
-                                                tool_call_str = tool_call_buffer[start_idx:end_idx].strip()
-                                                tool_call_json = json.loads(tool_call_str)
-                                                delta = {
-                                                    "content": None,
-                                                    "tool_calls": [{
-                                                        "index": 0,
-                                                        "id": generate_random_id("call_"),
-                                                        "type": "function",
-                                                        "function": {
-                                                            "name": tool_call_json["name"],
-                                                            "arguments": json.dumps(tool_call_json["parameters"])
-                                                        }
-                                                    }]
-                                                }
-                                                chunk_data = {
-                                                    "id": chat_id,
-                                                    "object": "chat.completion.chunk",
-                                                    "created": created,
-                                                    "model": model_id,
-                                                    "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
-                                                    "usage": None
-                                                }
-                                                yield f"data: {json.dumps(chunk_data)}\n\n"
-                                                in_tool_call = False
-                                                tool_call_buffer = ""
-                                            except (json.JSONDecodeError, KeyError):
-                                                # Fallback to regular content if parsing fails
-                                                in_tool_call = False
-                                                tool_call_buffer = ""
                                         else:
-                                            # Still building tool call - skip sending this chunk
                                             continue
                                     else:
                                         # Regular content
-                                        delta = {"content": content_piece}
                                         if is_first_chunk:
                                             delta["role"] = "assistant"
                                             is_first_chunk = False
                                         chunk_data = {
-                                            "id": chat_id,
-                                            "object": "chat.completion.chunk",
-                                            "created": created,
                                             "model": model_id,
                                             "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
                                             "usage": None
                                         }
                                         yield f"data: {json.dumps(chunk_data)}\n\n"
-                                except json.JSONDecodeError:
-                                    continue
                             elif line.startswith(("e:", "d:")):
                                 try:
                                     usage_info = json.loads(line[2:]).get("usage")
-                                except (json.JSONDecodeError, AttributeError):
-                                    pass
                                 break
-                        # Final chunk
                         done_chunk = {
-                            "id": chat_id,
-                            "object": "chat.completion.chunk",
-                            "created": created,
-                            "model": model_id,
                             "choices": [{
                                 "index": 0,
-                                "delta": {},
                                 "finish_reason": "stop"
                             }],
-                            "usage": usage_info
                         }
                         yield f"data: {json.dumps(done_chunk)}\n\n"
-                        yield "data: [DONE]\n\n"
             except httpx.HTTPStatusError as e:
                 error_content = {
                     "error": {
-                        "message": f"Upstream API error: {e.response.status_code}",
-                        "type": "upstream_error",
-                        "code": str(e.response.status_code)
                     }
                 }
                 yield f"data: {json.dumps(error_content)}\n\n"
                 yield "data: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
-    else:  # Non-streaming
         try:
             async with httpx.AsyncClient(timeout=120) as client:
-                response = await client.post(
-                    "https://www.chatwithmono.xyz/api/chat",
-                    headers=headers,
-                    json=payload
-                )
-                response.raise_for_status()
-                assistant_response = ""
-                usage_info = {}
-                for line in response.text.splitlines():
-                    if line.startswith("0:"):
-                        try:
-                            assistant_response += json.loads(line[2:])
-                        except json.JSONDecodeError:
-                            continue
-                    elif line.startswith(("e:", "d:")):
-                        try:
-                            usage_info = json.loads(line[2:]).get("usage", {})
-                        except json.JSONDecodeError:
-                            continue
-                tool_calls = None
-                if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
-                    try:
-                        # Extract tool call content
-                        start_idx = assistant_response.find("<tool_call>") + len("<tool_call>")
-                        end_idx = assistant_response.find("</tool_call>")
-                        tool_call_str = assistant_response[start_idx:end_idx].strip()
-                        tool_call_json = json.loads(tool_call_str)
-                        tool_calls = [{
-                            "id": generate_random_id("call_"),
-                            "type": "function",
-                            "function": {
-                                "name": tool_call_json["name"],
-                                "arguments": json.dumps(tool_call_json["parameters"])
-                            }
-                        }]
-                        # Clear content for tool call response
-                        assistant_response = None
-                    except (json.JSONDecodeError, KeyError):
-                        # If parsing fails, treat as regular content
-                        tool_calls = None
-                return JSONResponse(content={
-                    "id": chat_id,
-                    "object": "chat.completion",
-                    "created": int(time.time()),
-                    "model": model_id,
-                    "choices": [{
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": assistant_response,
-                            "tool_calls": tool_calls
-                        },
-                        "finish_reason": "stop"
-                    }],
-                    "usage": {
-                        "prompt_tokens": usage_info.get("promptTokens", 0),
-                        "completion_tokens": usage_info.get("completionTokens", 0),
-                        "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
-                    }
-                })
-        except httpx.HTTPStatusError as e:
-            return JSONResponse(
-                status_code=e.response.status_code,
-                content={
-                    "error": {
-                        "message": f"Upstream API error: {e.response.status_code}",
-                        "type": "upstream_error",
-                        "code": str(e.response.status_code)
-                    }
                 }
-            )
 # === Image Generation ===
 class ImageGenerationRequest(BaseModel):
@@ -420,4 +376,4 @@ async def create_moderation(request: ModerationRequest):
 # --- Main Execution ---
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import secrets
 import string
 import time
+from typing import List, Optional, Union, Any
 import httpx
 from dotenv import load_dotenv
 from fastapi import FastAPI
     return {"object": "list", "data": AVAILABLE_MODELS}
 # === Chat Completion ===
 class Message(BaseModel):
     role: str
+    content: str
 class ChatRequest(BaseModel):
     messages: List[Message]
         'referer': 'https://www.chatwithmono.xyz/',
         'user-agent': 'Mozilla/5.0',
     }
     if request.tools:
+        # Handle tool by giving in system prompt.
+        # Tool call must be encoded in <tool_call><tool_call> XML tag.
+        tool_prompt = f"""You have access to the following tools . To call a tool, please respond with JSON for a tool call within <tool_call><tool_call> XML tag. Respond in the format {{"name": tool name, "parameters": dictionary of argument name and its value}}. Do not use variables.
+Tools:
+{";".join(f"<tool>{tool}</tool>" for tool in request.tools)}
+Response Format for tool call:
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{{"name": <function-name>, "arguments": <args-json-object>}}
+</tool_call>
+Example of tool calling:
+<tool_call>
+{{"name": "get_weather", "parameters": {{"city": "New York"}}}}
+</tool_call>
+Using tools is recommended.
+        """
+        if request.messages[0].role == "system":
             request.messages[0].content += "\n\n" + tool_prompt
         else:
+            request.messages.insert(0, {"role": "system", "content": tool_prompt})
     request_data = request.model_dump(exclude_unset=True)
     payload = {
         "messages": request_data["messages"],
         "model": model_id
     }
     if request.stream:
         async def event_stream():
             created = int(time.time())
             is_first_chunk = True
             usage_info = None
+            is_tool_call = False
+            chunks_buffer = []
+            max_initial_chunks = 4  # Number of initial chunks to buffer
             try:
                 async with httpx.AsyncClient(timeout=120) as client:
+                    async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
                         response.raise_for_status()
                         async for line in response.aiter_lines():
+                            if not line: continue
                             if line.startswith("0:"):
                                 try:
                                     content_piece = json.loads(line[2:])
+                                    print(content_piece)
+                                    # Buffer the first few chunks
+                                    if len(chunks_buffer) < max_initial_chunks:
+                                        chunks_buffer.append(content_piece)
+                                        continue
+                                    # Process the buffered chunks if we haven't already
+                                    if chunks_buffer and not is_tool_call:
+                                        full_buffer = ''.join(chunks_buffer)
+                                        if "<tool_call>" in full_buffer:
+                                            print("Tool call detected")
+                                            is_tool_call = True
+                                    # Process the current chunk
+                                    if is_tool_call:
+                                        chunks_buffer.append(content_piece)
+                                        full_buffer = ''.join(chunks_buffer)
+                                        if "</tool_call>" in full_buffer:
+                                            print("Tool call End detected")
+                                            # Process tool call in the current chunk
+                                            tool_call_str = full_buffer.split("<tool_call>")[1].split("</tool_call>")[0]
+                                            tool_call_json = json.loads(tool_call_str.strip())
+                                            delta = {
+                                                "content": None,
+                                                "tool_calls": [{
+                                                    "index": 0,
+                                                    "id": generate_random_id("call_"),
+                                                    "type": "function",
+                                                    "function": {
+                                                        "name": tool_call_json["name"],
+                                                        "arguments": json.dumps(tool_call_json["parameters"])
+                                                    }
+                                                }]
+                                            }
+                                            chunk_data = {
+                                                "id": chat_id, "object": "chat.completion.chunk", "created": created,
+                                                "model": model_id,
+                                                "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
+                                                "usage": None
+                                            }
+                                            yield f"data: {json.dumps(chunk_data)}\n\n"
                                         else:
                                             continue
                                     else:
                                         # Regular content
                                         if is_first_chunk:
+                                            delta = {"content": "".join(chunks_buffer), "tool_calls": None}
                                             delta["role"] = "assistant"
                                             is_first_chunk = False
+                                            chunk_data = {
+                                            "id": chat_id, "object": "chat.completion.chunk", "created": created,
+                                            "model": model_id,
+                                            "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
+                                            "usage": None
+                                            }
+                                            yield f"data: {json.dumps(chunk_data)}\n\n"
+                                        delta = {"content": content_piece, "tool_calls": None}
                                         chunk_data = {
+                                            "id": chat_id, "object": "chat.completion.chunk", "created": created,
                                             "model": model_id,
                                             "choices": [{"index": 0, "delta": delta, "finish_reason": None}],
                                             "usage": None
                                         }
                                         yield f"data: {json.dumps(chunk_data)}\n\n"
+                                except json.JSONDecodeError: continue
                             elif line.startswith(("e:", "d:")):
                                 try:
                                     usage_info = json.loads(line[2:]).get("usage")
+                                except (json.JSONDecodeError, AttributeError): pass
                                 break
+                        final_usage = None
+                        if usage_info:
+                            prompt_tokens = usage_info.get("promptTokens", 0)
+                            completion_tokens = usage_info.get("completionTokens", 0)
+                            final_usage = {
+                                "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens,
+                                "total_tokens": prompt_tokens + completion_tokens,
+                            }
                         done_chunk = {
+                            "id": chat_id, "object": "chat.completion.chunk", "created": created, "model": model_id,
                             "choices": [{
                                 "index": 0,
+                                "delta": {"role": "assistant", "content": None, "function_call": None, "tool_calls": None},
                                 "finish_reason": "stop"
                             }],
+                            "usage": final_usage
                         }
                         yield f"data: {json.dumps(done_chunk)}\n\n"
             except httpx.HTTPStatusError as e:
                 error_content = {
                     "error": {
+                        "message": f"Upstream API error: {e.response.status_code}. Details: {e.response.text}",
+                        "type": "upstream_error", "code": str(e.response.status_code)
                     }
                 }
                 yield f"data: {json.dumps(error_content)}\n\n"
+            finally:
                 yield "data: [DONE]\n\n"
         return StreamingResponse(event_stream(), media_type="text/event-stream")
+    else: # Non-streaming
+        assistant_response, usage_info = "", {}
+        tool_call_json = None
         try:
             async with httpx.AsyncClient(timeout=120) as client:
+                async with client.stream("POST", "https://www.chatwithmono.xyz/api/chat", headers=headers, json=payload) as response:
+                    response.raise_for_status()
+                    async for chunk in response.aiter_lines():
+                        if chunk.startswith("0:"):
+                            try: assistant_response += json.loads(chunk[2:])
+                            except: continue
+                        elif chunk.startswith(("e:", "d:")):
+                            try: usage_info = json.loads(chunk[2:]).get("usage", {})
+                            except: continue
+            if "<tool_call>" in assistant_response and "</tool_call>" in assistant_response:
+                tool_call_str = assistant_response.split("<tool_call>")[1].split("</tool_call>")[0]
+                tool_call_json = json.loads(tool_call_str.strip())
+            return JSONResponse(content={
+                "id": chat_id, "object": "chat.completion", "created": int(time.time()), "model": model_id,
+                "choices": [{"index": 0, "message": {"role": "assistant", "content": assistant_response if tool_call_json is None else None, "tool_calls": tool_call_json}, "finish_reason": "stop"}],
+                "usage": {
+                    "prompt_tokens": usage_info.get("promptTokens", 0),
+                    "completion_tokens": usage_info.get("completionTokens", 0),
+                    "total_tokens": usage_info.get("promptTokens", 0) + usage_info.get("completionTokens", 0),
                 }
+            })
+        except httpx.HTTPStatusError as e:
+            return JSONResponse(status_code=e.response.status_code, content={"error": {"message": f"Upstream API error. Details: {e.response.text}", "type": "upstream_error"}})
 # === Image Generation ===
 class ImageGenerationRequest(BaseModel):
 # --- Main Execution ---
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)