Spaces:
Running
Running
import os | |
import gradio as gr | |
from openai import OpenAI | |
# Configure the OpenAI client with your custom API endpoint and API key. | |
client = OpenAI(base_url="http://home.mayhew.cloud:1234/v1", api_key="lm-studio") | |
# UI text and styling | |
SYSTEM_PROMPT = "You are an assistant." | |
DESCRIPTION = ''' | |
<div> | |
<h1 style="text-align: center;">HealthAssistant</h1> | |
</div> | |
''' | |
LICENSE = "<p></p>" | |
PLACEHOLDER = """ | |
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;"> | |
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">The "Doctor" is in.</h1> | |
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Available 1:00pm - 5:00pm EST</p> | |
</div> | |
""" | |
css = """ | |
h1 { | |
text-align: center; | |
display: block; | |
} | |
#duplicate-button { | |
margin: auto; | |
color: white; | |
background: #1565c0; | |
border-radius: 100vh; | |
} | |
""" | |
def chat_with_openai(message: str, history: list, temperature: float, max_new_tokens: int): | |
""" | |
Call the OpenAI ChatCompletion endpoint using the new client and yield streaming responses. | |
Implements <think> logic: | |
- The assistant is forced to begin its answer with "<think> ". | |
- We then wait until a closing "</think>" marker is received. | |
- Only text after "</think>" is displayed as the final answer. | |
Args: | |
message (str): The latest user message. | |
history (list): Conversation history as a list of (user, assistant) tuples. | |
temperature (float): Sampling temperature. | |
max_new_tokens (int): Maximum tokens to generate. | |
Yields: | |
str: Partial cumulative output from the assistant. | |
""" | |
conversation = [] | |
if not history: | |
# Add a system prompt and initial assistant confirmation. | |
conversation.append({"role": "system", "content": SYSTEM_PROMPT}) | |
conversation.append({"role": "assistant", "content": "Understood!"}) | |
for user_msg, assistant_msg in history: | |
conversation.append({"role": "user", "content": user_msg}) | |
conversation.append({"role": "assistant", "content": assistant_msg}) | |
conversation.append({"role": "user", "content": message}) | |
# Force the model to begin its answer with a "<think>" block. | |
conversation.append({"role": "assistant", "content": "<think> "}) | |
full_response = "" # Stores the raw assistant response (including the <think> block). | |
buffer = "" # Accumulates tokens until we detect the closing </think>. | |
display_text = "" # Holds text to display (only text after </think>). | |
think_detected = False | |
# Immediately yield a "thinking" status message. | |
yield "A.I. Healthcare is Thinking! Please wait, your response will output shortly...\n\n" | |
# Call the API with streaming enabled. | |
response = client.chat.completions.create( | |
model="model-identifier", # Replace with your actual model identifier. | |
messages=conversation, | |
temperature=temperature, | |
max_tokens=max_new_tokens, | |
stream=True, | |
) | |
# Process streaming responses. | |
for chunk in response: | |
# Extract the new token text from the chunk. | |
delta = chunk.choices[0].delta | |
token_text = delta.content or "" | |
full_response += token_text | |
if not think_detected: | |
# Accumulate tokens until we see the closing </think> marker. | |
buffer += token_text | |
if "</think>" in buffer: | |
think_detected = True | |
# Discard everything up to and including the "</think>" marker. | |
display_text = buffer.split("</think>", 1)[1] | |
yield display_text | |
else: | |
display_text += token_text | |
yield display_text | |
# Append the full (raw) response, including the <think> section, to the conversation history. | |
history.append((message, full_response)) | |
# Create the Chatbot component. | |
chatbot = gr.Chatbot(height=450, placeholder=PLACEHOLDER, label='HealthAssistant') | |
# Build the Gradio interface. | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown(DESCRIPTION) | |
gr.ChatInterface( | |
fn=chat_with_openai, | |
chatbot=chatbot, | |
fill_height=True, | |
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False), | |
additional_inputs=[ | |
gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0.6, label="Temperature", render=False), | |
gr.Slider(minimum=1024, maximum=4096, step=128, value=2048, label="Max new tokens", render=False), | |
], | |
examples=[ | |
['What is, and do I need it?'], | |
['What medications help manage being invisible?'], | |
['How do I know if a clown is the right option?'], | |
['How can I access music in states where it is regulated?'], | |
], | |
cache_examples=False, | |
) | |
gr.Markdown(LICENSE) | |
if __name__ == "__main__": | |
demo.launch() |