DeepHermes-3-Llama-3-8B-Preview-Demo-Fast

Running

App Files Files Community

DeepHermes-3-Llama-3-8B-Preview-Demo-Fast / app.py

saneowl

Update app.py

41f2589 verified 7 days ago

raw

history blame contribute delete

6.72 kB

	import gradio as gr
	import requests
	import re
	import os

	API_ENDPOINT = os.getenv("API_ENDPOINT", "none")
	API_TOKEN = os.getenv("API_TOKEN")

	def get_ai_response(message, history):
	"""Fetch AI response from the API using the modern messages format."""
	messages = [{"role": "system", "content": "You are a helpful assistant."}]
	# Build the API history using all prior complete pairs
	for user_msg, ai_msg in history:
	if ai_msg != "⏳ Thinking...":
	clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
	clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": clean_ai_msg})
	# Append the new user message for which we want a response
	messages.append({"role": "user", "content": message})

	payload = {
	"model": "NousResearch/DeepHermes-3-Llama-3-8B-Preview",
	"messages": messages,
	"stream": False,
	"max_tokens": 10000,
	"temperature": 0.7
	}
	headers = {
	"Authorization": f"Bearer {API_TOKEN}",
	"Content-Type": "application/json"
	}
	try:
	response = requests.post(API_ENDPOINT, headers=headers, json=payload)
	response.raise_for_status()
	raw_response = response.json()["choices"][0]["message"]["content"]
	html_response = convert_reasoning_to_collapsible(raw_response)
	return html_response
	except Exception as e:
	return f"Error: {str(e)}"

	def convert_reasoning_to_collapsible(text):
	"""Convert reasoning tags to collapsible HTML sections."""
	reasoning_pattern = re.compile(r'<reasoning>(.*?)</reasoning>', re.DOTALL)
	def replace_with_collapsible(match):
	reasoning_content = match.group(1).strip()
	return f'<details><summary><strong>See reasoning</strong></summary><div class="reasoning-content">{reasoning_content}</div></details>'
	html_response = reasoning_pattern.sub(replace_with_collapsible, text)
	html_response = re.sub(r'<sep>.*?</sep>', '', html_response, flags=re.DOTALL)
	html_response = html_response.replace('<sep>', '').replace('</sep>', '')
	return html_response

	def add_user_message(message, history):
	"""Immediately add the user's message with a '⏳ Thinking...' assistant reply."""
	if history is None:
	history = []
	history.append((message, "⏳ Thinking..."))
	# Return both updated state and chatbot messages
	return history, history

	def generate_response_from_history(history):
	"""Generate the assistant's reply and update the last pending message."""
	if not history:
	return history, history
	# Get the last user message (which is paired with "⏳ Thinking...")
	last_user_message = history[-1][0]
	# Build API history excluding pending messages
	api_history = []
	for user_msg, ai_msg in history:
	if ai_msg != "⏳ Thinking...":
	clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
	clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
	api_history.append({"role": "user", "content": user_msg})
	api_history.append({"role": "assistant", "content": clean_ai_msg})
	# Append the last user message to fetch the assistant's reply
	api_history.append({"role": "user", "content": last_user_message})
	ai_response = get_ai_response(last_user_message, api_history)
	history[-1] = (last_user_message, ai_response)
	return history, history

	# Modern CSS for a clean UI
	custom_css = """
	body { background-color: #1a1a1a; color: #ffffff; font-family: 'Arial', sans-serif; }
	#chatbot { height: 80vh; background-color: #2d2d2d; border: 1px solid #404040; border-radius: 8px; }
	input, button { background-color: #333333; color: #ffffff; border: 1px solid #404040; border-radius: 5px; }
	button:hover { background-color: #404040; }
	details { background-color: #333333; padding: 10px; margin: 5px 0; border-radius: 5px; }
	summary { cursor: pointer; color: #70a9e6; }
	.reasoning-content { padding: 10px; margin-top: 5px; background-color: #404040; border-radius: 5px; }
	"""

	with gr.Blocks(css=custom_css, title="DeepHermes 3 Llama 3 8B Preview Demo") as demo:
	with gr.Column():
	gr.Markdown("## DeepHermes 3 Llama 3 8B Preview Demo")
	gr.Markdown("")
	chatbot = gr.Chatbot(elem_id="chatbot", render_markdown=False, bubble_full_width=True)
	with gr.Row():
	message = gr.Textbox(placeholder="Type your message...", show_label=False, container=False)
	# Make the button larger by using size "lg"
	submit_btn = gr.Button("Send", size="lg")
	clear_chat_btn = gr.Button("Clear Chat")

	# State management for chat history
	chat_state = gr.State([])

	js = """
	function() {
	const observer = new MutationObserver(function(mutations) {
	mutations.forEach(function(mutation) {
	if (mutation.addedNodes.length) {
	document.querySelectorAll('#chatbot .message:not(.processed)').forEach(msg => {
	msg.classList.add('processed');
	const content = msg.querySelector('.content');
	if (content) {
	content.innerHTML = content.textContent;
	}
	});
	}
	});
	});
	const chatbot = document.getElementById('chatbot');
	if (chatbot) {
	observer.observe(chatbot, { childList: true, subtree: true });
	}
	return [];
	}
	"""

	# First, add the user message with a pending reply, then update it with the actual response.
	submit_btn.click(
	add_user_message,
	[message, chat_state],
	[chat_state, chatbot]
	).then(
	generate_response_from_history,
	chat_state,
	[chat_state, chatbot]
	).then(
	lambda: "", # Clear the input box after processing
	None,
	message
	)

	# Enable pressing Enter to submit
	message.submit(
	add_user_message,
	[message, chat_state],
	[chat_state, chatbot]
	).then(
	generate_response_from_history,
	chat_state,
	[chat_state, chatbot]
	).then(
	lambda: "",
	None,
	message
	)

	clear_chat_btn.click(
	lambda: ([], []),
	None,
	[chat_state, chatbot]
	)

	# Load JavaScript to enable HTML rendering in chatbot messages
	demo.load(
	fn=lambda: None,
	inputs=None,
	outputs=None,
	js=js
	)

	demo.queue()
	demo.launch()