import gradio as gr import requests import re import os API_ENDPOINT = os.getenv("API_ENDPOINT", "none") API_TOKEN = os.getenv("API_TOKEN") def get_ai_response(message, history): """Fetch AI response from the API using the modern messages format.""" messages = [{"role": "system", "content": "You are a helpful assistant."}] # Build the API history using all prior complete pairs for user_msg, ai_msg in history: if ai_msg != "⏳ Thinking...": clean_ai_msg = re.sub(r'
.*?
', '', ai_msg, flags=re.DOTALL) clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg) messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": clean_ai_msg}) # Append the new user message for which we want a response messages.append({"role": "user", "content": message}) payload = { "model": "NousResearch/DeepHermes-3-Llama-3-8B-Preview", "messages": messages, "stream": False, "max_tokens": 10000, "temperature": 0.7 } headers = { "Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json" } try: response = requests.post(API_ENDPOINT, headers=headers, json=payload) response.raise_for_status() raw_response = response.json()["choices"][0]["message"]["content"] html_response = convert_reasoning_to_collapsible(raw_response) return html_response except Exception as e: return f"Error: {str(e)}" def convert_reasoning_to_collapsible(text): """Convert reasoning tags to collapsible HTML sections.""" reasoning_pattern = re.compile(r'(.*?)', re.DOTALL) def replace_with_collapsible(match): reasoning_content = match.group(1).strip() return f'
See reasoning
{reasoning_content}
' html_response = reasoning_pattern.sub(replace_with_collapsible, text) html_response = re.sub(r'.*?', '', html_response, flags=re.DOTALL) html_response = html_response.replace('', '').replace('', '') return html_response def add_user_message(message, history): """Immediately add the user's message with a '⏳ Thinking...' assistant reply.""" if history is None: history = [] history.append((message, "⏳ Thinking...")) # Return both updated state and chatbot messages return history, history def generate_response_from_history(history): """Generate the assistant's reply and update the last pending message.""" if not history: return history, history # Get the last user message (which is paired with "⏳ Thinking...") last_user_message = history[-1][0] # Build API history excluding pending messages api_history = [] for user_msg, ai_msg in history: if ai_msg != "⏳ Thinking...": clean_ai_msg = re.sub(r'
.*?
', '', ai_msg, flags=re.DOTALL) clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg) api_history.append({"role": "user", "content": user_msg}) api_history.append({"role": "assistant", "content": clean_ai_msg}) # Append the last user message to fetch the assistant's reply api_history.append({"role": "user", "content": last_user_message}) ai_response = get_ai_response(last_user_message, api_history) history[-1] = (last_user_message, ai_response) return history, history # Modern CSS for a clean UI custom_css = """ body { background-color: #1a1a1a; color: #ffffff; font-family: 'Arial', sans-serif; } #chatbot { height: 80vh; background-color: #2d2d2d; border: 1px solid #404040; border-radius: 8px; } input, button { background-color: #333333; color: #ffffff; border: 1px solid #404040; border-radius: 5px; } button:hover { background-color: #404040; } details { background-color: #333333; padding: 10px; margin: 5px 0; border-radius: 5px; } summary { cursor: pointer; color: #70a9e6; } .reasoning-content { padding: 10px; margin-top: 5px; background-color: #404040; border-radius: 5px; } """ with gr.Blocks(css=custom_css, title="DeepHermes 3 Llama 3 8B Preview Demo") as demo: with gr.Column(): gr.Markdown("## DeepHermes 3 Llama 3 8B Preview Demo") gr.Markdown("") chatbot = gr.Chatbot(elem_id="chatbot", render_markdown=False, bubble_full_width=True) with gr.Row(): message = gr.Textbox(placeholder="Type your message...", show_label=False, container=False) # Make the button larger by using size "lg" submit_btn = gr.Button("Send", size="lg") clear_chat_btn = gr.Button("Clear Chat") # State management for chat history chat_state = gr.State([]) js = """ function() { const observer = new MutationObserver(function(mutations) { mutations.forEach(function(mutation) { if (mutation.addedNodes.length) { document.querySelectorAll('#chatbot .message:not(.processed)').forEach(msg => { msg.classList.add('processed'); const content = msg.querySelector('.content'); if (content) { content.innerHTML = content.textContent; } }); } }); }); const chatbot = document.getElementById('chatbot'); if (chatbot) { observer.observe(chatbot, { childList: true, subtree: true }); } return []; } """ # First, add the user message with a pending reply, then update it with the actual response. submit_btn.click( add_user_message, [message, chat_state], [chat_state, chatbot] ).then( generate_response_from_history, chat_state, [chat_state, chatbot] ).then( lambda: "", # Clear the input box after processing None, message ) # Enable pressing Enter to submit message.submit( add_user_message, [message, chat_state], [chat_state, chatbot] ).then( generate_response_from_history, chat_state, [chat_state, chatbot] ).then( lambda: "", None, message ) clear_chat_btn.click( lambda: ([], []), None, [chat_state, chatbot] ) # Load JavaScript to enable HTML rendering in chatbot messages demo.load( fn=lambda: None, inputs=None, outputs=None, js=js ) demo.queue() demo.launch()