Spaces:

ServiceNow-AI
/

Apriel-Chat

Running

App Files Files Community

bradnow commited on 22 days ago

Commit

0d67078

1 Parent(s): a220efd

Add selector for model and do some layout

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +143 -103
utils.py +81 -0

README.md CHANGED Viewed

@@ -11,4 +11,4 @@ license: mit
 short_description: ServiceNow-AI model chat
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 short_description: ServiceNow-AI model chat
 ---
+A chatbot for ServiceNow-AI model chat. This is a demo of the Apriel Nemotron Chat model. The chatbot can answer questions, provide information, etc.

app.py CHANGED Viewed

@@ -1,99 +1,66 @@
-import os
-import sys
 import datetime
 from openai import OpenAI
 import gradio as gr
-from gradio.components.chatbot import ChatMessage, Message
-from typing import (
-    Any,
-    Literal,
-)
-DEBUG_LOG = False or os.environ.get("DEBUG_LOG") == "True"
 print(f"Gradio version: {gr.__version__}")
-title = None  # "ServiceNow-AI Chat" # modelConfig.get('MODE_DISPLAY_NAME')
-description = "Please use the community section on this space to provide feedback! <a href=\"https://huggingface.co/ServiceNow-AI/Apriel-Nemotron-15b-Thinker/discussions\">ServiceNow-AI/Apriel-Nemotron-Chat</a>"
 chat_start_count = 0
-model_config = {
-    "MODEL_NAME": os.environ.get("MODEL_NAME"),
-    "MODE_DISPLAY_NAME": os.environ.get("MODE_DISPLAY_NAME"),
-    "MODEL_HF_URL": os.environ.get("MODEL_HF_URL"),
-    "VLLM_API_URL": os.environ.get("VLLM_API_URL"),
-    "AUTH_TOKEN": os.environ.get("AUTH_TOKEN")
-}
-# Initialize the OpenAI client with the vLLM API URL and token
-client = OpenAI(
-    api_key=model_config.get('AUTH_TOKEN'),
-    base_url=model_config.get('VLLM_API_URL')
-)
-def log_message(message):
-    if DEBUG_LOG is True:
-        print(message)
-# Gradio 5.0.1 had issues with checking the message formats.  5.29.0 does not!
-def _check_format(messages: Any, type: Literal["messages", "tuples"] = "messages") -> None:
-    if type == "messages":
-        all_valid = all(
-            isinstance(message, dict)
-            and "role" in message
-            and "content" in message
-            or isinstance(message, ChatMessage | Message)
-            for message in messages
-        )
-        if not all_valid:
-            # Display which message is not valid
-            for i, message in enumerate(messages):
-                if not (isinstance(message, dict) and
-                        "role" in message and
-                        "content" in message) and not isinstance(message, ChatMessage | Message):
-                    print(f"_check_format() --> Invalid message at index {i}: {message}\n", file=sys.stderr)
-                    break
-            raise Exception(
-                "Data incompatible with messages format. Each message should be a dictionary with 'role' and 'content' keys or a ChatMessage object."
-            )
-        # else:
-        #     print("_check_format() --> All messages are valid.")
-    elif not all(
-            isinstance(message, (tuple, list)) and len(message) == 2
-            for message in messages
-    ):
-        raise Exception(
-            "Data incompatible with tuples format. Each message should be a list of length 2."
-        )
 def chat_fn(message, history):
-    log_message(f"{'-' * 80}\nchat_fn() --> Message: {message}")
     global chat_start_count
     chat_start_count = chat_start_count + 1
     print(
         f"{datetime.datetime.now()}: chat_start_count: {chat_start_count}, turns: {int(len(history if history else []) / 3)}")
     # Remove any assistant messages with metadata from history for multiple turns
     log_message(f"Original History: {history}")
-    _check_format(history, "messages")
     history = [item for item in history if
                not (isinstance(item, dict) and
                     item.get("role") == "assistant" and
                     isinstance(item.get("metadata"), dict) and
                     item.get("metadata", {}).get("title") is not None)]
     log_message(f"Updated History: {history}")
-    _check_format(history, "messages")
     history.append({"role": "user", "content": message})
     log_message(f"History with user message: {history}")
-    _check_format(history, "messages")
     # Create the streaming response
     stream = client.chat.completions.create(
@@ -103,13 +70,14 @@ def chat_fn(message, history):
         stream=True
     )
-    history.append(gr.ChatMessage(
-        role="assistant",
-        content="Thinking...",
-        metadata={"title": "🧠 Thought"}
-    ))
-    log_message(f"History added thinking: {history}")
-    _check_format(history, "messages")
     output = ""
     completion_started = False
@@ -118,49 +86,121 @@ def chat_fn(message, history):
         content = getattr(chunk.choices[0].delta, "content", "")
         output += content
-        parts = output.split("[BEGIN FINAL RESPONSE]")
-        if len(parts) > 1:
-            if parts[1].endswith("[END FINAL RESPONSE]"):
-                parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
-            if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"):
-                parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "")
-        history[-1 if not completion_started else -2] = gr.ChatMessage(
-            role="assistant",
-            content=parts[0],
-            metadata={"title": "🧠 Thought"}
-        )
-        if completion_started:
-            history[-1] = gr.ChatMessage(
                 role="assistant",
-                content=parts[1]
             )
-        elif len(parts) > 1 and not completion_started:
-            completion_started = True
-            history.append(gr.ChatMessage(
                 role="assistant",
-                content=parts[1]
-            ))
         # only yield the most recent assistant messages
         messages_to_yield = history[-1:] if not completion_started else history[-2:]
-        # _check_format(messages_to_yield, "messages")
         yield messages_to_yield
     log_message(f"Final History: {history}")
-    _check_format(history, "messages")
-# Add the model display name and Hugging Face URL to the description
-# description = f"### Model: [{MODE_DISPLAY_NAME}]({MODEL_HF_URL})"
-print(f"Running model {model_config.get('MODE_DISPLAY_NAME')} ({model_config.get('MODEL_NAME')})")
-gr.ChatInterface(
-    chat_fn,
-    title=title,
-    description=description,
-    theme=gr.themes.Default(primary_hue="green"),
-    type="messages",
-).launch()

 import datetime
 from openai import OpenAI
 import gradio as gr
+from utils import COMMUNITY_POSTFIX_URL, get_model_config, log_message, check_format, models_config
 print(f"Gradio version: {gr.__version__}")
+DEFAULT_MODEL_NAME = "Apriel-Nemotron-15b-Thinker"
 chat_start_count = 0
+model_config = None
+client = None
+def setup_model(model_name, intial=False):
+    global model_config, client
+    model_config = get_model_config(model_name)
+    log_message(f"update_model() --> Model config: {model_config}")
+    client = OpenAI(
+        api_key=model_config.get('AUTH_TOKEN'),
+        base_url=model_config.get('VLLM_API_URL')
+    )
+    _model_hf_name = model_config.get("MODEL_HF_URL").split('https://huggingface.co/')[1]
+    _link = f"<a href='{model_config.get('MODEL_HF_URL')}{COMMUNITY_POSTFIX_URL}' target='_blank'>{_model_hf_name}</a>"
+    _description = f"Please use the community section on this space to provide feedback! {_link}"
+    print(f"Switched to model {_model_hf_name}")
+    if intial:
+        return
+    else:
+        return _description
 def chat_fn(message, history):
+    log_message(f"{'-' * 80}")
+    log_message(f"chat_fn() --> Message: {message}")
+    log_message(f"chat_fn() --> History: {history}")
     global chat_start_count
     chat_start_count = chat_start_count + 1
     print(
         f"{datetime.datetime.now()}: chat_start_count: {chat_start_count}, turns: {int(len(history if history else []) / 3)}")
+    is_reasoning = model_config.get("REASONING")
     # Remove any assistant messages with metadata from history for multiple turns
     log_message(f"Original History: {history}")
+    check_format(history, "messages")
     history = [item for item in history if
                not (isinstance(item, dict) and
                     item.get("role") == "assistant" and
                     isinstance(item.get("metadata"), dict) and
                     item.get("metadata", {}).get("title") is not None)]
     log_message(f"Updated History: {history}")
+    check_format(history, "messages")
     history.append({"role": "user", "content": message})
     log_message(f"History with user message: {history}")
+    check_format(history, "messages")
     # Create the streaming response
     stream = client.chat.completions.create(
         stream=True
     )
+    if is_reasoning:
+        history.append(gr.ChatMessage(
+            role="assistant",
+            content="Thinking...",
+            metadata={"title": "🧠 Thought"}
+        ))
+        log_message(f"History added thinking: {history}")
+        check_format(history, "messages")
     output = ""
     completion_started = False
         content = getattr(chunk.choices[0].delta, "content", "")
         output += content
+        if is_reasoning:
+            parts = output.split("[BEGIN FINAL RESPONSE]")
+            if len(parts) > 1:
+                if parts[1].endswith("[END FINAL RESPONSE]"):
+                    parts[1] = parts[1].replace("[END FINAL RESPONSE]", "")
+                if parts[1].endswith("[END FINAL RESPONSE]\n<|end|>"):
+                    parts[1] = parts[1].replace("[END FINAL RESPONSE]\n<|end|>", "")
+                if parts[1].endswith("<|end|>"):
+                    parts[1] = parts[1].replace("<|end|>", "")
+            history[-1 if not completion_started else -2] = gr.ChatMessage(
                 role="assistant",
+                content=parts[0],
+                metadata={"title": "🧠 Thought"}
             )
+            if completion_started:
+                history[-1] = gr.ChatMessage(
+                    role="assistant",
+                    content=parts[1]
+                )
+            elif len(parts) > 1 and not completion_started:
+                completion_started = True
+                history.append(gr.ChatMessage(
+                    role="assistant",
+                    content=parts[1]
+                ))
+        else:
+            if output.endswith("<|end|>"):
+                output = output.replace("<|end|>", "")
+            history[-1] = gr.ChatMessage(
                 role="assistant",
+                content=output
+            )
         # only yield the most recent assistant messages
         messages_to_yield = history[-1:] if not completion_started else history[-2:]
+        # check_format(messages_to_yield, "messages")
+        # log_message(f"Yielding messages: {messages_to_yield}")
         yield messages_to_yield
     log_message(f"Final History: {history}")
+    check_format(history, "messages")
+title = None
+description = None
+with gr.Blocks(theme=gr.themes.Default(primary_hue="green")) as demo:
+    gr.HTML("""
+    <style>
+        .model-message {
+            text-align: end;
+        }
+        .model-dropdown-container {
+            display: flex;
+            align-items: center;
+            gap: 10px;
+            padding: 0;
+        }
+        @media (max-width: 800px) {
+            .responsive-row {
+                flex-direction: column;
+            }
+            .model-dropdown-container {
+                flex-direction: column;
+                align-items: flex-start;
+            }
+        }
+    """)
+    with gr.Row(variant="panel", elem_classes="responsive-row"):
+        with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
+            model_dropdown = gr.Dropdown(
+                choices=[f"Model: {model}" for model in models_config.keys()],
+                value=f"Model: {DEFAULT_MODEL_NAME}",
+                label=None,
+                interactive=True,
+                container=False,
+                scale=0,
+                min_width=400
+            )
+        with gr.Column(scale=4, min_width=0):
+            description_html = gr.HTML(description, elem_classes="model-message")
+    chat_bot = gr.Chatbot(
+        type="messages",
+        height="calc(100vh - 320px)",
+    )
+    chat_interface = gr.ChatInterface(
+        chat_fn,
+        description="",
+        type="messages",
+        chatbot=chat_bot
+    )
+    # Add this line to ensure the model is reset to default on page reload
+    demo.load(lambda: setup_model(DEFAULT_MODEL_NAME, intial=False), [], [description_html])
+    def update_model_and_clear(model_name):
+        # Remove the "Model: " prefix to get the actual model name
+        actual_model_name = model_name.replace("Model: ", "")
+        desc = setup_model(actual_model_name)
+        chat_bot.clear()  # Critical line
+        return desc
+    model_dropdown.change(
+        fn=update_model_and_clear,
+        inputs=[model_dropdown],
+        outputs=[description_html]
+    )
+demo.launch()

utils.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import os
+import sys
+from typing import Any, Literal
+from gradio import ChatMessage
+from gradio.components.chatbot import Message
+COMMUNITY_POSTFIX_URL = "/discussions"
+DEBUG_MODE = False or os.environ.get("DEBUG_MODE") == "True"
+models_config = {
+    "Apriel-Nemotron-15b-Thinker": {
+        "MODEL_DISPLAY_NAME": "Apriel-Nemotron-15b-Thinker",
+        "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-Nemotron-15b-Thinker",
+        "MODEL_NAME": os.environ.get("MODEL_NAME_NEMO_15B"),
+        "VLLM_API_URL": os.environ.get("VLLM_API_URL_NEMO_15B"),
+        "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
+        "REASONING": True
+    },
+    "Apriel-5b": {
+        "MODEL_DISPLAY_NAME": "Apriel-5b",
+        "MODEL_HF_URL": "https://huggingface.co/ServiceNow-AI/Apriel-5B-Instruct",
+        "MODEL_NAME": os.environ.get("MODEL_NAME_5B"),
+        "VLLM_API_URL": os.environ.get("VLLM_API_URL_5B"),
+        "AUTH_TOKEN": os.environ.get("AUTH_TOKEN"),
+        "REASONING": False
+    }
+}
+def get_model_config(model_name: str) -> dict:
+    config = models_config.get(model_name)
+    if not config:
+        raise ValueError(f"Model {model_name} not found in models_config")
+    if not config.get("MODEL_NAME"):
+        raise ValueError(f"Model name not found in config for {model_name}")
+    if not config.get("VLLM_API_URL"):
+        raise ValueError(f"VLLM API URL not found in config for {model_name}")
+    return config
+def log_message(message):
+    if DEBUG_MODE is True:
+        print(f"≫≫≫ {message}")
+# Gradio 5.0.1 had issues with checking the message formats.  5.29.0 does not!
+def check_format(messages: Any, type: Literal["messages", "tuples"] = "messages") -> None:
+    if not DEBUG_MODE:
+        return
+    if type == "messages":
+        all_valid = all(
+            isinstance(message, dict)
+            and "role" in message
+            and "content" in message
+            or isinstance(message, ChatMessage | Message)
+            for message in messages
+        )
+        if not all_valid:
+            # Display which message is not valid
+            for i, message in enumerate(messages):
+                if not (isinstance(message, dict) and
+                        "role" in message and
+                        "content" in message) and not isinstance(message, ChatMessage | Message):
+                    print(f"_check_format() --> Invalid message at index {i}: {message}\n", file=sys.stderr)
+                    break
+            raise Exception(
+                "Data incompatible with messages format. Each message should be a dictionary with 'role' and 'content' keys or a ChatMessage object."
+            )
+        # else:
+        #     print("_check_format() --> All messages are valid.")
+    elif not all(
+            isinstance(message, (tuple, list)) and len(message) == 2
+            for message in messages
+    ):
+        raise Exception(
+            "Data incompatible with tuples format. Each message should be a list of length 2."
+        )