DeepHermes-3-Llama-3-8B-Preview-Demo-Fast

Sleeping

DeepHermes-3-Llama-3-8B-Preview-Demo-Fast

File size: 6,715 Bytes

63c9177
0588a72
3ad05c5
d0d60f4
0588a72
 
 
 
 
079c63d
616220d
5ad9b18
 
616220d
5ad9b18
 
 
 
 
 
 
0588a72
0e06728
0588a72
 
de624eb
0588a72
 
 
 
 
 
 
 
 
3ad05c5
 
 
0588a72
 
 
3ad05c5
 
 
 
 
 
 
 
 
 
 
5ad9b18
616220d
5ad9b18
0588a72
616220d
 
 
5ad9b18
 
 
 
616220d
 
5ad9b18
616220d
 
 
 
 
 
 
 
 
 
 
5ad9b18
616220d
995a0e5
079c63d
0588a72
079c63d
d0d60f4
079c63d
 
3ad05c5
 
 
 
 
41f2589
d0d60f4
41f2589
0b81ef5
d0d60f4
 
 
5ad9b18
 
d0d60f4
0588a72
616220d
 
0588a72
3ad05c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa05415
616220d
0588a72
5ad9b18
d0d60f4
5ad9b18
aa05415
5ad9b18
aa05415
5ad9b18
0588a72
616220d
0588a72
 
 
 
616220d
3ad05c5
5ad9b18
d0d60f4
5ad9b18
3ad05c5
5ad9b18
3ad05c5
5ad9b18
3ad05c5
5ad9b18
3ad05c5
 
 
 
0588a72
5ad9b18
0588a72
d0d60f4
0588a72
 
616220d
0588a72
d0d60f4
3ad05c5
 
 
0588a72
 
2a08ae8
69f09c6

import gradio as gr
import requests
import re
import os

API_ENDPOINT = os.getenv("API_ENDPOINT", "none")
API_TOKEN = os.getenv("API_TOKEN")

def get_ai_response(message, history):
    """Fetch AI response from the API using the modern messages format."""
    messages = [{"role": "system", "content": "You are a helpful assistant."}]
    # Build the API history using all prior complete pairs
    for user_msg, ai_msg in history:
        if ai_msg != "⏳ Thinking...":
            clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
            clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": clean_ai_msg})
    # Append the new user message for which we want a response
    messages.append({"role": "user", "content": message})
    
    payload = {
        "model": "NousResearch/DeepHermes-3-Llama-3-8B-Preview",  
        "messages": messages,
        "stream": False,
        "max_tokens": 10000,
        "temperature": 0.7
    }
    headers = {
        "Authorization": f"Bearer {API_TOKEN}",
        "Content-Type": "application/json"
    }
    try:
        response = requests.post(API_ENDPOINT, headers=headers, json=payload)
        response.raise_for_status()
        raw_response = response.json()["choices"][0]["message"]["content"]
        html_response = convert_reasoning_to_collapsible(raw_response)
        return html_response
    except Exception as e:
        return f"Error: {str(e)}"

def convert_reasoning_to_collapsible(text):
    """Convert reasoning tags to collapsible HTML sections."""
    reasoning_pattern = re.compile(r'<reasoning>(.*?)</reasoning>', re.DOTALL)
    def replace_with_collapsible(match):
        reasoning_content = match.group(1).strip()
        return f'<details><summary><strong>See reasoning</strong></summary><div class="reasoning-content">{reasoning_content}</div></details>'
    html_response = reasoning_pattern.sub(replace_with_collapsible, text)
    html_response = re.sub(r'<sep>.*?</sep>', '', html_response, flags=re.DOTALL)
    html_response = html_response.replace('<sep>', '').replace('</sep>', '')
    return html_response

def add_user_message(message, history):
    """Immediately add the user's message with a '⏳ Thinking...' assistant reply."""
    if history is None:
        history = []
    history.append((message, "⏳ Thinking..."))
    # Return both updated state and chatbot messages
    return history, history

def generate_response_from_history(history):
    """Generate the assistant's reply and update the last pending message."""
    if not history:
        return history, history
    # Get the last user message (which is paired with "⏳ Thinking...")
    last_user_message = history[-1][0]
    # Build API history excluding pending messages
    api_history = []
    for user_msg, ai_msg in history:
        if ai_msg != "⏳ Thinking...":
            clean_ai_msg = re.sub(r'<details>.*?</details>', '', ai_msg, flags=re.DOTALL)
            clean_ai_msg = re.sub(r'<[^>]*>', '', clean_ai_msg)
            api_history.append({"role": "user", "content": user_msg})
            api_history.append({"role": "assistant", "content": clean_ai_msg})
    # Append the last user message to fetch the assistant's reply
    api_history.append({"role": "user", "content": last_user_message})
    ai_response = get_ai_response(last_user_message, api_history)
    history[-1] = (last_user_message, ai_response)
    return history, history

# Modern CSS for a clean UI
custom_css = """
body { background-color: #1a1a1a; color: #ffffff; font-family: 'Arial', sans-serif; }
#chatbot { height: 80vh; background-color: #2d2d2d; border: 1px solid #404040; border-radius: 8px; }
input, button { background-color: #333333; color: #ffffff; border: 1px solid #404040; border-radius: 5px; }
button:hover { background-color: #404040; }
details { background-color: #333333; padding: 10px; margin: 5px 0; border-radius: 5px; }
summary { cursor: pointer; color: #70a9e6; }
.reasoning-content { padding: 10px; margin-top: 5px; background-color: #404040; border-radius: 5px; }
"""

with gr.Blocks(css=custom_css, title="DeepHermes 3 Llama 3 8B Preview Demo") as demo:
    with gr.Column():
        gr.Markdown("## DeepHermes 3 Llama 3 8B Preview Demo")
        gr.Markdown("")
        chatbot = gr.Chatbot(elem_id="chatbot", render_markdown=False, bubble_full_width=True)
        with gr.Row():
            message = gr.Textbox(placeholder="Type your message...", show_label=False, container=False)
            # Make the button larger by using size "lg"
            submit_btn = gr.Button("Send", size="lg")
        clear_chat_btn = gr.Button("Clear Chat")
    
    # State management for chat history
    chat_state = gr.State([])

    js = """
    function() {
        const observer = new MutationObserver(function(mutations) {
            mutations.forEach(function(mutation) {
                if (mutation.addedNodes.length) {
                    document.querySelectorAll('#chatbot .message:not(.processed)').forEach(msg => {
                        msg.classList.add('processed');
                        const content = msg.querySelector('.content');
                        if (content) {
                            content.innerHTML = content.textContent;
                        }
                    });
                }
            });
        });
        const chatbot = document.getElementById('chatbot');
        if (chatbot) {
            observer.observe(chatbot, { childList: true, subtree: true });
        }
        return [];
    }
    """

    # First, add the user message with a pending reply, then update it with the actual response.
    submit_btn.click(
        add_user_message,
        [message, chat_state],
        [chat_state, chatbot]
    ).then(
        generate_response_from_history,
        chat_state,
        [chat_state, chatbot]
    ).then(
        lambda: "",  # Clear the input box after processing
        None,
        message
    )

    # Enable pressing Enter to submit
    message.submit(
        add_user_message,
        [message, chat_state],
        [chat_state, chatbot]
    ).then(
        generate_response_from_history,
        chat_state,
        [chat_state, chatbot]
    ).then(
        lambda: "",
        None,
        message
    )

    clear_chat_btn.click(
        lambda: ([], []),
        None,
        [chat_state, chatbot]
    )

    # Load JavaScript to enable HTML rendering in chatbot messages
    demo.load(
        fn=lambda: None,
        inputs=None,
        outputs=None,
        js=js
    )

demo.queue()
demo.launch()