api-test

Runtime error

App Files Files Community

OjciecTadeusz commited on Nov 14, 2024

Commit

97b4be5

verified ·

1 Parent(s): 3c345b6

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -30

app.py CHANGED Viewed

@@ -48,7 +48,6 @@ async def chat_completion(request: Request):
         data = await request.json()
         messages = data.get("messages", [])
-        # Prepare the payload for the Inference API
         payload = {
             "inputs": {
                 "messages": messages
@@ -61,7 +60,6 @@ async def chat_completion(request: Request):
             }
         }
-        # Get response from model
         response = await query_model(payload)
         if isinstance(response, dict) and "error" in response:
@@ -73,13 +71,7 @@ async def chat_completion(request: Request):
         response_text = response[0]["generated_text"]
         return JSONResponse(
-            content=format_chat_response(
-                response_text,
-                # Note: Actual token counts would need to be calculated differently
-                # or obtained from the API response if available
-                prompt_tokens=0,
-                completion_tokens=0
-            )
         )
     except Exception as e:
         return JSONResponse(
@@ -87,7 +79,6 @@ async def chat_completion(request: Request):
             content={"error": str(e)}
         )
-# Synchronous function to generate response for Gradio
 def generate_response(messages):
     payload = {
         "inputs": {
@@ -109,31 +100,47 @@ def generate_response(messages):
     return result[0]["generated_text"]
-# Gradio interface for testing
-def chat_interface(message, history):
-    history = history or []
-    messages = []
-    # Convert history to messages format
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current message
-    messages.append({"role": "user", "content": message})
-    # Generate response synchronously
     try:
-        response_text = generate_response(messages)
-        return response_text
     except Exception as e:
-        return f"Error generating response: {str(e)}"
-interface = gr.ChatInterface(
-    chat_interface,
     title="Qwen2.5-Coder-32B Chat",
-    description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API. This Space also provides a /v1/chat/completions endpoint."
 )
 # Mount both FastAPI and Gradio
-app = gr.mount_gradio_app(app, interface, path="/")

         data = await request.json()
         messages = data.get("messages", [])
         payload = {
             "inputs": {
                 "messages": messages
             }
         }
         response = await query_model(payload)
         if isinstance(response, dict) and "error" in response:
         response_text = response[0]["generated_text"]
         return JSONResponse(
+            content=format_chat_response(response_text)
         )
     except Exception as e:
         return JSONResponse(
             content={"error": str(e)}
         )
 def generate_response(messages):
     payload = {
         "inputs": {
     return result[0]["generated_text"]
+def chat_interface(message, chat_history):
+    if message.strip() == "":
+        return chat_history
     try:
+        # Format the message history in the OpenAI style
+        messages = []
+        for msg in chat_history:
+            messages.append({"role": "user", "content": msg[0]})
+            if msg[1] is not None:
+                messages.append({"role": "assistant", "content": msg[1]})
+        # Add the current message
+        messages.append({"role": "user", "content": message})
+        # Get response
+        response = generate_response(messages)
+        # Update history in the new format
+        chat_history.append((message, response))
+        return chat_history
     except Exception as e:
+        chat_history.append((message, f"Error: {str(e)}"))
+        return chat_history
+# Create Gradio interface with new message format
+demo = gr.ChatInterface(
+    fn=chat_interface,
     title="Qwen2.5-Coder-32B Chat",
+    description="Chat with Qwen2.5-Coder-32B model via Hugging Face Inference API",
+    examples=["Hello! Can you help me with coding?",
+             "Write a simple Python function to calculate factorial"],
+    retry_btn="Retry",
+    undo_btn="Undo last message",
+    clear_btn="Clear conversation",
 )
 # Mount both FastAPI and Gradio
+app = gr.mount_gradio_app(app, demo, path="/")
+# For running with uvicorn directly
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)