Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,959 Bytes
e10040f 6d32964 e10040f 651f3a2 e10040f 651f3a2 e10040f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import json
from globe import title, description, customtool
model_path = "nvidia/Nemotron-Mini-4B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
# Create a pipeline
pipe = pipeline("text-generation", model=model_path)
pipe.tokenizer = tokenizer # Assign tokenizer manually
def create_prompt(system_message, user_message, tool_definition=""):
if tool_definition:
return f"""<extra_id_0>System
{system_message}
<tool>
{tool_definition}
</tool>
<context>
The current date is 2023-06-01.
</context>
<extra_id_1>User
{user_message}
<extra_id_1>Assistant
"""
else:
return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"
def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition=""):
full_prompt = create_prompt(system_message, message, tool_definition)
if use_pipeline:
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": message},
]
response = pipe(messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)[0]['generated_text']
else:
tokenized_chat = tokenizer.apply_chat_template(
[
{"role": "system", "content": system_message},
{"role": "user", "content": message},
],
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
)
with torch.no_grad():
output_ids = model.generate(
tokenized_chat,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
assistant_response = response.split("<extra_id_1>Assistant\n")[-1].strip()
if tool_definition and "<toolcall>" in assistant_response:
tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
assistant_response += f"\n\nTool Call: {tool_call}\n\nNote: This is a simulated tool call. In a real scenario, the tool would be executed and its output would be used to generate a final response."
return assistant_response
with gr.Blocks() as demo:
gr.Markdown("# π€ Nemotron-Mini-4B-Instruct Demo with Custom Function Calling")
gr.Markdown("This demo showcases the Nemotron-Mini-4B-Instruct model from NVIDIA, including optional custom function calling.")
with gr.Row():
with gr.Column(scale=3):
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...")
clear = gr.Button("Clear")
with gr.Column(scale=2):
system_message = gr.Textbox(
label="System Message",
value="You are a helpful AI assistant.",
lines=2,
placeholder="Set the AI's behavior and context..."
)
max_tokens = gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
use_tool = gr.Checkbox(label="Use Function Calling", value=False)
with gr.Column(visible=False) as tool_options:
tool_definition = gr.Code(
label="Tool Definition (JSON)",
value=customtool,
lines=15,
language="json"
# placeholder="Enter the JSON definition of your custom tool..."
)
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition):
user_message = history[-1][0]
bot_message = generate_response(user_message, history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition)
history[-1][1] = bot_message
return history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition], chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
use_tool.change(
fn=lambda x: gr.update(visible=x),
inputs=[use_tool],
outputs=[tool_options]
)
if __name__ == "__main__":
demo.launch() |