OjciecTadeusz commited on
Commit
922765a
·
verified ·
1 Parent(s): 4b77577

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -34
app.py CHANGED
@@ -1,15 +1,18 @@
1
  import gradio as gr
2
- from fastapi import FastAPI, Request
3
  from fastapi.responses import JSONResponse
4
  import datetime
5
  import requests
6
  import os
7
- import json
8
- import asyncio
9
 
10
  # Initialize FastAPI
11
  app = FastAPI()
12
 
 
 
 
 
13
  # Configuration
14
  API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
15
  headers = {
@@ -39,29 +42,31 @@ def format_chat_response(response_text, prompt_tokens=0, completion_tokens=0):
39
  }
40
 
41
  async def query_model(payload):
42
- response = requests.post(API_URL, headers=headers, json=payload)
43
- return response.json()
 
 
 
 
 
44
 
45
  @app.get("/status")
46
  async def status():
47
  try:
48
  response_text = "it's working"
49
-
50
- return JSONResponse(
51
- content=format_chat_response(response_text)
52
- )
53
  except Exception as e:
54
- return JSONResponse(
55
- status_code=500,
56
- content={"error": str(e)}
57
- )
58
-
59
  @app.post("/v1/chat/completions")
60
  async def chat_completion(request: Request):
61
  try:
62
  data = await request.json()
63
  messages = data.get("messages", [])
64
-
 
 
65
  payload = {
66
  "inputs": {
67
  "messages": messages
@@ -77,21 +82,17 @@ async def chat_completion(request: Request):
77
  response = await query_model(payload)
78
 
79
  if isinstance(response, dict) and "error" in response:
80
- return JSONResponse(
81
- status_code=500,
82
- content={"error": response["error"]}
83
- )
84
 
85
  response_text = response[0]["generated_text"]
86
 
87
- return JSONResponse(
88
- content=format_chat_response(response_text)
89
- )
 
90
  except Exception as e:
91
- return JSONResponse(
92
- status_code=500,
93
- content={"error": str(e)}
94
- )
95
 
96
  def generate_response(messages):
97
  payload = {
@@ -106,13 +107,18 @@ def generate_response(messages):
106
  }
107
  }
108
 
109
- response = requests.post(API_URL, headers=headers, json=payload)
110
- result = response.json()
111
-
112
- if isinstance(result, dict) and "error" in result:
113
- return f"Error: {result['error']}"
114
-
115
- return result[0]["generated_text"]
 
 
 
 
 
116
 
117
  def chat_interface(messages):
118
  chat_history = []
@@ -128,7 +134,6 @@ def chat_interface(messages):
128
 
129
  # Create Gradio interface
130
  def gradio_app():
131
- #return gr.chat_interface(gr.Chatbot(placeholder="placeholder"), type="messages", value=[])
132
  return gr.ChatInterface(chat_interface, type="messages")
133
 
134
  # Mount both FastAPI and Gradio
 
1
  import gradio as gr
2
+ from fastapi import FastAPI, Request, HTTPException
3
  from fastapi.responses import JSONResponse
4
  import datetime
5
  import requests
6
  import os
7
+ import logging
 
8
 
9
  # Initialize FastAPI
10
  app = FastAPI()
11
 
12
+ # Configure logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
  # Configuration
17
  API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B"
18
  headers = {
 
42
  }
43
 
44
  async def query_model(payload):
45
+ try:
46
+ response = requests.post(API_URL, headers=headers, json=payload)
47
+ response.raise_for_status()
48
+ return response.json()
49
+ except requests.exceptions.RequestException as e:
50
+ logger.error(f"Request failed: {e}")
51
+ raise HTTPException(status_code=500, detail=str(e))
52
 
53
  @app.get("/status")
54
  async def status():
55
  try:
56
  response_text = "it's working"
57
+ return JSONResponse(content=format_chat_response(response_text))
 
 
 
58
  except Exception as e:
59
+ logger.error(f"Status check failed: {e}")
60
+ raise HTTPException(status_code=500, detail=str(e))
61
+
 
 
62
  @app.post("/v1/chat/completions")
63
  async def chat_completion(request: Request):
64
  try:
65
  data = await request.json()
66
  messages = data.get("messages", [])
67
+ if not messages:
68
+ raise HTTPException(status_code=400, detail="Messages are required")
69
+
70
  payload = {
71
  "inputs": {
72
  "messages": messages
 
82
  response = await query_model(payload)
83
 
84
  if isinstance(response, dict) and "error" in response:
85
+ raise HTTPException(status_code=500, detail=response["error"])
 
 
 
86
 
87
  response_text = response[0]["generated_text"]
88
 
89
+ return JSONResponse(content=format_chat_response(response_text))
90
+ except HTTPException as e:
91
+ logger.error(f"Chat completion failed: {e.detail}")
92
+ raise e
93
  except Exception as e:
94
+ logger.error(f"Unexpected error: {e}")
95
+ raise HTTPException(status_code=500, detail=str(e))
 
 
96
 
97
  def generate_response(messages):
98
  payload = {
 
107
  }
108
  }
109
 
110
+ try:
111
+ response = requests.post(API_URL, headers=headers, json=payload)
112
+ response.raise_for_status()
113
+ result = response.json()
114
+
115
+ if isinstance(result, dict) and "error" in result:
116
+ return f"Error: {result['error']}"
117
+
118
+ return result[0]["generated_text"]
119
+ except requests.exceptions.RequestException as e:
120
+ logger.error(f"Request failed: {e}")
121
+ return f"Error: {e}"
122
 
123
  def chat_interface(messages):
124
  chat_history = []
 
134
 
135
  # Create Gradio interface
136
  def gradio_app():
 
137
  return gr.ChatInterface(chat_interface, type="messages")
138
 
139
  # Mount both FastAPI and Gradio