api-test

Runtime error

App Files Files Community

api-test / app.py

OjciecTadeusz

Update app.py

97b4be5 verified 8 months ago

raw

history blame

4.36 kB

	import gradio as gr
	from fastapi import FastAPI, Request
	from fastapi.responses import JSONResponse
	import datetime
	import requests
	import os
	import json
	import asyncio

	# Initialize FastAPI
	app = FastAPI()

	# Configuration
	API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
	headers = {
	"Authorization": f"Bearer {os.getenv('HF_API_TOKEN')}",
	"Content-Type": "application/json"
	}

	def format_chat_response(response_text, prompt_tokens=0, completion_tokens=0):
	return {
	"id": f"chatcmpl-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}",
	"object": "chat.completion",
	"created": int(datetime.datetime.now().timestamp()),
	"model": "Qwen/Qwen2.5-Coder-32B",
	"choices": [{
	"index": 0,
	"message": {
	"role": "assistant",
	"content": response_text
	},
	"finish_reason": "stop"
	}],
	"usage": {
	"prompt_tokens": prompt_tokens,
	"completion_tokens": completion_tokens,
	"total_tokens": prompt_tokens + completion_tokens
	}
	}

	async def query_model(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

	@app.post("/v1/chat/completions")
	async def chat_completion(request: Request):
	try:
	data = await request.json()
	messages = data.get("messages", [])

	payload = {
	"inputs": {
	"messages": messages
	},
	"parameters": {
	"max_new_tokens": data.get("max_tokens", 2048),
	"temperature": data.get("temperature", 0.7),
	"top_p": data.get("top_p", 0.95),
	"do_sample": True
	}
	}

	response = await query_model(payload)

	if isinstance(response, dict) and "error" in response:
	return JSONResponse(
	status_code=500,
	content={"error": response["error"]}
	)

	response_text = response[0]["generated_text"]

	return JSONResponse(
	content=format_chat_response(response_text)
	)
	except Exception as e:
	return JSONResponse(
	status_code=500,
	content={"error": str(e)}
	)

	def generate_response(messages):
	payload = {
	"inputs": {
	"messages": messages
	},
	"parameters": {
	"max_new_tokens": 2048,
	"temperature": 0.7,
	"top_p": 0.95,
	"do_sample": True
	}
	}

	response = requests.post(API_URL, headers=headers, json=payload)
	result = response.json()

	if isinstance(result, dict) and "error" in result:
	return f"Error: {result['error']}"

	return result[0]["generated_text"]

	def chat_interface(message, chat_history):
	if message.strip() == "":
	return chat_history

	try:
	# Format the message history in the OpenAI style
	messages = []
	for msg in chat_history:
	messages.append({"role": "user", "content": msg[0]})
	if msg[1] is not None:
	messages.append({"role": "assistant", "content": msg[1]})

	# Add the current message
	messages.append({"role": "user", "content": message})

	# Get response
	response = generate_response(messages)

	# Update history in the new format
	chat_history.append((message, response))
	return chat_history
	except Exception as e:
	chat_history.append((message, f"Error: {str(e)}"))
	return chat_history

	# Create Gradio interface with new message format
	demo = gr.ChatInterface(
	fn=chat_interface,
	title="Qwen2.5-Coder-32B Chat",
	description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API",
	examples=["Hello! Can you help me with coding?",
	"Write a simple Python function to calculate factorial"],
	retry_btn="Retry",
	undo_btn="Undo last message",
	clear_btn="Clear conversation",
	)

	# Mount both FastAPI and Gradio
	app = gr.mount_gradio_app(app, demo, path="/")

	# For running with uvicorn directly
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)