Nemo-Mistral-Minitron

Running on Zero

App Files Files Community

Nemo-Mistral-Minitron / app.py

Tonic

improve interface

0071153 unverified 3 months ago

raw

history blame

5.61 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
	import json
	from globe import title, description, customtool , presentation1, presentation2, joinus
	import spaces

	model_path = "nvidia/Nemotron-Mini-4B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	model = AutoModelForCausalLM.from_pretrained(model_path)

	# Create a pipeline
	pipe = pipeline("text-generation", model=model_path)
	pipe.tokenizer = tokenizer # Assign tokenizer manually

	def create_prompt(system_message, user_message, tool_definition="", context=""):
	if tool_definition:
	return f"""<extra_id_0>System
	{system_message}

	<tool>
	{tool_definition}
	</tool>
	<context>
	{context}
	</context>

	<extra_id_1>User
	{user_message}
	<extra_id_1>Assistant
	"""
	else:
	return f"<extra_id_0>System\n{system_message}\n\n<extra_id_1>User\n{user_message}\n<extra_id_1>Assistant\n"

	@spaces.GPU
	def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""):
	full_prompt = create_prompt(system_message, message, tool_definition, context)

	if use_pipeline:
	messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": message},
	]
	response = pipe(messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p)[0]['generated_text']
	else:
	tokenized_chat = tokenizer.apply_chat_template(
	[
	{"role": "system", "content": system_message},
	{"role": "user", "content": message},
	],
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt"
	)

	with torch.no_grad():
	output_ids = model.generate(
	tokenized_chat,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True
	)

	response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

	assistant_response = response.split("<extra_id_1>Assistant\n")[-1].strip()

	if tool_definition and "<toolcall>" in assistant_response:
	tool_call = assistant_response.split("<toolcall>")[1].split("</toolcall>")[0]
	assistant_response += f"\n\nTool Call: {tool_call}\n\nNote: This is a simulated tool call. In a real scenario, the tool would be executed and its output would be used to generate a final response."

	return assistant_response

	with gr.Blocks() as demo:
	with gr.Row():
	gr.Markdown(title)
	with gr.Row():
	gr.Markdown(description)
	with gr.Row():
	with gr.Group():
	gr.Markdown(presentation1)
	with gr.Group():
	gr.Markdown(presentation2)
	with gr.Row():
	gr.Markdown(joinus)
	with gr.Row():
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(height=400)
	msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...")
	with gr.Accordion(label="🧪Advanced Settings", open=False):
	system_message = gr.Textbox(
	label="System Message",
	value="You are a helpful AI assistant.",
	lines=2,
	placeholder="Set the AI's behavior and context..."
	)
	context = gr.Textbox(
	label="Context",
	lines=2,
	placeholder="Enter additional context information..."
	)
	max_tokens = gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens")
	temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature")
	top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
	use_pipeline = gr.Checkbox(label="Use Pipeline", value=False)
	use_tool = gr.Checkbox(label="Use Function Calling", value=False)
	with gr.Column(visible=False) as tool_options:
	tool_definition = gr.Code(
	label="Tool Definition (JSON)",
	value=customtool,
	lines=15,
	language="json"
	)
	with gr.Row():
	clear = gr.Button("Clear")
	send = gr.Button("Send")


	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context):
	user_message = history[-1][0]
	bot_message = generate_response(user_message, history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context)
	history[-1][1] = bot_message
	return history

	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context], chatbot
	)
	send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context], chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	use_tool.change(
	fn=lambda x: gr.update(visible=x),
	inputs=[use_tool],
	outputs=[tool_options]
	)

	if __name__ == "__main__":
	demo.launch()