Spaces:

ServiceNow-AI
/

Apriel-Chat

Running

App Files Files Community

Apriel-Chat / app.py

bradnow

Improve logging for normal operation

2ce979c 29 days ago

raw

history blame

15 kB

	import datetime
	from uuid import uuid4

	from openai import OpenAI
	import gradio as gr

	from theme import apriel
	from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_config, \
	logged_event_handler, DEBUG_MODEL, log_debug, log_info, log_error
	from log_chat import log_chat

	MODEL_TEMPERATURE = 0.8
	BUTTON_WIDTH = 160
	DEFAULT_OPT_OUT_VALUE = False

	DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker" if not DEBUG_MODEL else "Apriel-5b"

	BUTTON_ENABLED = gr.update(interactive=True)
	BUTTON_DISABLED = gr.update(interactive=False)
	INPUT_ENABLED = gr.update(interactive=True)
	INPUT_DISABLED = gr.update(interactive=False)
	DROPDOWN_ENABLED = gr.update(interactive=True)
	DROPDOWN_DISABLED = gr.update(interactive=False)

	SEND_BUTTON_ENABLED = gr.update(interactive=True, visible=True)
	SEND_BUTTON_DISABLED = gr.update(interactive=True, visible=False)
	STOP_BUTTON_ENABLED = gr.update(interactive=True, visible=True)
	STOP_BUTTON_DISABLED = gr.update(interactive=True, visible=False)

	chat_start_count = 0
	model_config = {}
	openai_client = None


	def app_loaded(state, request: gr.Request):
	message_html = setup_model(DEFAULT_MODEL_NAME, intial=False)
	state['session'] = request.session_hash if request else uuid4().hex
	log_debug(f"app_loaded() --> Session: {state['session']}")
	return state, message_html


	def update_model_and_clear_chat(model_name):
	actual_model_name = model_name.replace("Model: ", "")
	desc = setup_model(actual_model_name)
	return desc, []


	def setup_model(model_name, intial=False):
	global model_config, openai_client
	model_config = get_model_config(model_name)
	log_debug(f"update_model() --> Model config: {model_config}")
	openai_client = OpenAI(
	api_key=model_config.get('AUTH_TOKEN'),
	base_url=model_config.get('VLLM_API_URL')
	)

	_model_hf_name = model_config.get("MODEL_HF_URL").split('https://huggingface.co/')[1]
	_link = f"<a href='{model_config.get('MODEL_HF_URL')}{COMMUNITY_POSTFIX_URL}' target='_blank'>{_model_hf_name}</a>"
	_description = f"We'd love to hear your thoughts on the model. Click here to provide feedback - {_link}"

	log_debug(f"Switched to model {_model_hf_name}")

	if intial:
	return
	else:
	return _description


	def chat_started():
	# outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
	return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
	SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED)


	def chat_finished():
	# outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
	return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED


	def stop_chat(state):
	state["stop_flag"] = True
	gr.Info("Chat stopped")
	return state


	def toggle_opt_out(state, checkbox):
	state["opt_out"] = checkbox
	return state


	def run_chat_inference(history, message, state):
	global chat_start_count
	state["is_streaming"] = True
	state["stop_flag"] = False
	error = None
	model_name = model_config.get('MODEL_NAME')

	if len(history) == 0:
	state["chat_id"] = uuid4().hex

	if openai_client is None:
	log_info("Client UI is stale, letting user know to refresh the page")
	gr.Warning("Client UI is stale, please refresh the page")
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state

	# outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, session_state
	log_debug(f"{'-' * 80}")
	log_debug(f"chat_fn() --> Message: {message}")
	log_debug(f"chat_fn() --> History: {history}")

	try:
	# Check if the message is empty
	if not message.strip():
	gr.Info("Please enter a message before sending")
	yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state

	chat_start_count = chat_start_count + 1
	log_info(f"chat_start_count: {chat_start_count}, turns: {int(len(history if history else []) / 3)}, "
	f"model: {model_name}")

	is_reasoning = model_config.get("REASONING")

	# Remove any assistant messages with metadata from history for multiple turns
	log_debug(f"Initial History: {history}")
	check_format(history, "messages")
	history.append({"role": "user", "content": message})
	log_debug(f"History with user message: {history}")
	check_format(history, "messages")

	# Create the streaming response
	try:
	history_no_thoughts = [item for item in history if
	not (isinstance(item, dict) and
	item.get("role") == "assistant" and
	isinstance(item.get("metadata"), dict) and
	item.get("metadata", {}).get("title") is not None)]
	log_debug(f"Updated History: {history_no_thoughts}")
	check_format(history_no_thoughts, "messages")
	log_debug(f"history_no_thoughts with user message: {history_no_thoughts}")

	stream = openai_client.chat.completions.create(
	model=model_name,
	messages=history_no_thoughts,
	temperature=MODEL_TEMPERATURE,
	stream=True
	)
	except Exception as e:
	log_error(f"Error: {e}")
	error = str(e)
	yield ([{"role": "assistant",
	"content": "😔 The model is unavailable at the moment. Please try again later."}],
	INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state)
	if state["opt_out"] is not True:
	log_chat(chat_id=state["chat_id"],
	session_id=state["session"],
	model_name=model_name,
	prompt=message,
	history=history,
	info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE,
	"stopped": True, "error": str(e)},
	)
	else:
	log_info(f"User opted out of chat history. Not logging chat. model: {model_name}")
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state

	if is_reasoning:
	history.append(gr.ChatMessage(
	role="assistant",
	content="Thinking...",
	metadata={"title": "🧠 Thought"}
	))
	log_debug(f"History added thinking: {history}")
	check_format(history, "messages")
	else:
	history.append(gr.ChatMessage(
	role="assistant",
	content="",
	))
	log_debug(f"History added empty assistant: {history}")
	check_format(history, "messages")

	output = ""
	completion_started = False
	for chunk in stream:
	if state["stop_flag"]:
	log_debug(f"chat_fn() --> Stopping streaming...")
	break # Exit the loop if the stop flag is set
	# Extract the new content from the delta field
	content = getattr(chunk.choices[0].delta, "content", "")
	output += content

	if is_reasoning:
	parts = output.split("[BEGIN FINAL RESPONSE]")

	if len(parts) > 1:
	if parts[1].endswith("[END FINAL RESPONSE]"):
	parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
	if parts[1].endswith("[END FINAL RESPONSE]\n<\|end\|>"):
	parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<\|end\|>", "")
	if parts[1].endswith("<\|end\|>"):
	parts[1] = parts[1].replace("<\|end\|>", "")

	history[-1 if not completion_started else -2] = gr.ChatMessage(
	role="assistant",
	content=parts[0],
	metadata={"title": "🧠 Thought"}
	)
	if completion_started:
	history[-1] = gr.ChatMessage(
	role="assistant",
	content=parts[1]
	)
	elif len(parts) > 1 and not completion_started:
	completion_started = True
	history.append(gr.ChatMessage(
	role="assistant",
	content=parts[1]
	))
	else:
	if output.endswith("<\|end\|>"):
	output = output.replace("<\|end\|>", "")
	history[-1] = gr.ChatMessage(
	role="assistant",
	content=output
	)

	# log_message(f"Yielding messages: {history}")
	yield history, INPUT_DISABLED, SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED, state

	log_debug(f"Final History: {history}")
	check_format(history, "messages")
	yield history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state
	finally:
	if error is None:
	log_debug(f"chat_fn() --> Finished streaming. {chat_start_count} chats started.")
	if state["opt_out"] is not True:
	log_chat(chat_id=state["chat_id"],
	session_id=state["session"],
	model_name=model_name,
	prompt=message,
	history=history,
	info={"is_reasoning": model_config.get("REASONING"), "temperature": MODEL_TEMPERATURE,
	"stopped": state["stop_flag"]},
	)

	else:
	log_info(f"User opted out of chat history. Not logging chat. model: {model_name}")
	state["is_streaming"] = False
	state["stop_flag"] = False
	return history, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, state


	log_info(f"Gradio version: {gr.__version__}")

	title = None
	description = None
	theme = apriel

	with open('styles.css', 'r') as f:
	custom_css = f.read()

	with gr.Blocks(theme=theme, css=custom_css) as demo:
	session_state = gr.State(value={
	"is_streaming": False,
	"stop_flag": False,
	"chat_id": None,
	"session": None,
	"opt_out": DEFAULT_OPT_OUT_VALUE,
	}) # Store session state as a dictionary

	gr.HTML(f"""
	<style>
	@media (min-width: 1024px) {{
	.send-button-container, .clear-button-container {{
	max-width: {BUTTON_WIDTH}px;
	}}
	}}
	</style>
	""", elem_classes="css-styles")
	with gr.Row(variant="panel", elem_classes="responsive-row"):
	with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
	model_dropdown = gr.Dropdown(
	choices=[f"Model: {model}" for model in models_config.keys()],
	value=f"Model: {DEFAULT_MODEL_NAME}",
	label=None,
	interactive=True,
	container=False,
	scale=0,
	min_width=400
	)
	with gr.Column(scale=4, min_width=0):
	feedback_message_html = gr.HTML(description, elem_classes="model-message")

	chatbot = gr.Chatbot(
	type="messages",
	height="calc(100dvh - 310px)",
	elem_classes="chatbot",
	)

	with gr.Row():
	with gr.Column(scale=10, min_width=400):
	with gr.Row():
	user_input = gr.Textbox(
	show_label=False,
	placeholder="Type your message here and press Enter",
	container=False
	)
	with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
	with gr.Row():
	with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
	send_btn = gr.Button("Send", variant="primary")
	stop_btn = gr.Button("Stop", variant="cancel", visible=False)
	with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
	clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary")
	with gr.Row():
	with gr.Column(min_width=400, elem_classes="opt-out-container"):
	with gr.Row():
	gr.HTML(
	"We may use your chats to improve our AI. You may opt out if you don’t want your conversations saved.",
	elem_classes="opt-out-message")
	with gr.Row():
	opt_out_checkbox = gr.Checkbox(
	label="Don’t save my chat history for improvements or training",
	value=DEFAULT_OPT_OUT_VALUE,
	elem_classes="opt-out-checkbox",
	interactive=True,
	container=False
	)

	gr.on(
	triggers=[send_btn.click, user_input.submit],
	fn=run_chat_inference, # this generator streams results. do not use logged_event_handler wrapper
	inputs=[chatbot, user_input, session_state],
	outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
	concurrency_limit=4,
	api_name=False
	).then(
	fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], queue=False)

	# In parallel, disable or update the UI controls
	gr.on(
	triggers=[send_btn.click, user_input.submit],
	fn=chat_started,
	inputs=None,
	outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn],
	queue=False,
	show_progress='hidden',
	api_name=False
	)

	stop_btn.click(
	fn=stop_chat,
	inputs=[session_state],
	outputs=[session_state],
	api_name=False
	)

	opt_out_checkbox.change(fn=toggle_opt_out, inputs=[session_state, opt_out_checkbox], outputs=[session_state])

	# Ensure the model is reset to default on page reload
	demo.load(
	fn=logged_event_handler(
	log_msg="Browser session started",
	event_handler=app_loaded
	),
	inputs=[session_state],
	outputs=[session_state, feedback_message_html],
	queue=True,
	api_name=False
	)

	model_dropdown.change(
	fn=update_model_and_clear_chat,
	inputs=[model_dropdown],
	outputs=[feedback_message_html, chatbot],
	api_name=False
	)

	demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False)
	log_info("Gradio app launched")