Spaces:
Runtime error
Runtime error
File size: 4,682 Bytes
37e4010 922765a 37e4010 d6b0a9b 922765a cce0194 37e4010 cce0194 922765a d6b0a9b 404e508 d6b0a9b 37e4010 d6b0a9b 37e4010 cce0194 37e4010 cce0194 d6b0a9b 922765a d6b0a9b 4b77577 922765a 4b77577 922765a 37e4010 cce0194 37e4010 922765a d6b0a9b 404e508 d6b0a9b 37e4010 d6b0a9b 922765a 404e508 d6b0a9b 37e4010 922765a cce0194 922765a cce0194 1fb73a8 d6b0a9b 922765a 1fb73a8 d98612c 8cdb111 d98612c 8cdb111 d98612c 37e4010 dc3ffec ceeb878 102225f 404e508 37e4010 ceeb878 97b4be5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
import datetime
import requests
import os
import logging
# Initialize FastAPI
app = FastAPI()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Configuration
API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
headers = {
"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}",
"Content-Type": "application/json"
}
def format_chat_response(response_text, prompt_tokens=0, completion_tokens=0):
return {
"id": f"chatcmpl-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
"object": "chat.completion",
"created": int(datetime.datetime.now().timestamp()),
"model": "Qwen/Qwen2.5-Coder-32B",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": response_text
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
}
async def query_model(payload):
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/status")
async def status():
try:
response_text = "it's working"
return JSONResponse(content=format_chat_response(response_text))
except Exception as e:
logger.error(f"Status check failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/v1/chat/completions")
async def chat_completion(request: Request):
try:
data = await request.json()
messages = data.get("messages", [])
if not messages:
raise HTTPException(status_code=400, detail="Messages are required")
payload = {
"inputs": {
"messages": messages
},
"parameters": {
"max_new_tokens": data.get("max_tokens", 2048),
"temperature": data.get("temperature", 0.7),
"top_p": data.get("top_p", 0.95),
"do_sample": True
}
}
response = await query_model(payload)
if isinstance(response, dict) and "error" in response:
raise HTTPException(status_code=500, detail=response["error"])
response_text = response[0]["generated_text"]
return JSONResponse(content=format_chat_response(response_text))
except HTTPException as e:
logger.error(f"Chat completion failed: {e.detail}")
raise e
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise HTTPException(status_code=500, detail=str(e))
def generate_response(messages):
payload = {
"inputs": {
"messages": messages
},
"parameters": {
"max_new_tokens": 2048,
"temperature": 0.7,
"top_p": 0.95,
"do_sample": True
}
}
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status()
result = response.json()
if isinstance(result, dict) and "error" in result:
return f"Error: {result['error']}"
return result[0]["generated_text"]
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
return f"Error: {e}"
def chat_interface(messages):
chat_history = []
for message in messages:
try:
response = generate_response([{"role": "user", "content": message}])
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": response})
except Exception as e:
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": f"Error: {str(e)}"})
return chat_history
# Create Gradio interface
def gradio_app():
return gr.ChatInterface(chat_interface, type="messages")
# Mount both FastAPI and Gradio
app = gr.mount_gradio_app(app, gradio_app(), path="/")
# For running with uvicorn directly
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |