api-test / app.py
OjciecTadeusz's picture
Update app.py
c9bc402 verified
raw
history blame
3.36 kB
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
import datetime
import requests
import os
import logging
# Initialize FastAPI
app = FastAPI()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Configuration
API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
headers = {
"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}",
"Content-Type": "application/json"
}
def format_chat_response(response_text, prompt_tokens=0, completion_tokens=0):
return {
"id": f"chatcmpl-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
"object": "chat.completion",
"created": int(datetime.datetime.now().timestamp()),
"model": "Qwen/Qwen2.5-Coder-32B",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": response_text
},
"finish_reason": "stop"
}],
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
"total_tokens": prompt_tokens + completion_tokens
}
}
async def query_model(payload):
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.get("/status")
async def status():
try:
response_text = "it's working"
return JSONResponse(content=format_chat_response(response_text))
except Exception as e:
logger.error(f"Status check failed: {e}")
raise HTTPException(status_code=500, detail=str(e))
@app.post("/v1/chat/completions")
async def chat_completion(request: Request):
try:
data = await request.json()
messages = data.get("messages", [])
if not messages:
raise HTTPException(status_code=400, detail="Messages are required")
payload = {
"inputs": {
"messages": messages
},
"parameters": {
"max_new_tokens": data.get("max_tokens", 2048),
"temperature": data.get("temperature", 0.7),
"top_p": data.get("top_p", 0.95),
"do_sample": True
}
}
response = await query_model(payload)
if isinstance(response, dict) and "error" in response:
raise HTTPException(status_code=500, detail=response["error"])
response_text = response[0]["generated_text"]
return JSONResponse(content=format_chat_response(response_text))
except HTTPException as e:
logger.error(f"Chat completion failed: {e.detail}")
raise e
except Exception as e:
logger.error(f"Unexpected error: {e}")
raise HTTPException(status_code=500, detail=str(e))
def chat_interface(messages):
chat_history = []
# Create Gradio interface
def gradio_app():
#return gr.chat_interface(gr.Chatbot(placeholder="placeholder"), type="messages", value=[])
return gr.ChatInterface(chat_interface, type="messages")
# Mount both FastAPI and Gradio