MixtureOfInputs / app.py
yzhuang's picture
update gradio
f873ce7
raw
history blame
2.38 kB
import gradio as gr
import requests
import sseclient
import os
API_URL = "http://localhost:8000/v1/chat/completions"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
beta,
):
# Build message history
messages = [{"role": "system", "content": system_message}]
for user, assistant in history:
if user:
messages.append({"role": "user", "content": user})
if assistant:
messages.append({"role": "assistant", "content": assistant})
messages.append({"role": "user", "content": message})
# Prepare request payload
payload = {
"model": "Qwen/Qwen3-4B", # Update to your actual model if needed
"messages": messages,
"temperature": temperature,
"top_p": top_p,
"max_tokens": max_tokens,
"stream": True,
}
# Optional: send beta as a custom OpenAI field
headers = {
"Content-Type": "application/json",
"X-MIXINPUTS-BETA": str(beta), # or modify your vLLM code to read this
}
# Stream response using SSE (Server-Sent Events)
try:
response = requests.post(API_URL, json=payload, stream=True, headers=headers)
response.raise_for_status()
client = sseclient.SSEClient(response)
full_text = ""
for event in client.events():
if event.data == "[DONE]":
break
delta = event.json()["choices"][0]["delta"].get("content", "")
full_text += delta
yield full_text
except Exception as e:
yield f"[ERROR] {e}"
# UI layout using ChatInterface
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a helpful assistant using Mixture of Inputs.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.1, label="MoI Beta"),
],
title="🧪 Mixture of Inputs (MoI) Demo",
description="Streaming local vLLM demo with dynamic MoI beta adjustment.",
)
if __name__ == "__main__":
demo.launch()