Spaces:

vsrinivas
/

ChatBot_by_SrinivasV

App Files Files Community

ChatBot_by_SrinivasV / app.py

vsrinivas's picture

Update app.py

294fb7d verified 3 months ago

3.89 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM
	import transformers
	import torch
	import gradio as gr

	desired_dtype = torch.bfloat16
	torch.set_default_dtype(torch.bfloat16)

	# checkpoint = "vsrinivas/falconlite2"
	checkpoint = "tiiuae/falcon-7b-instruct"

	model = AutoModelForCausalLM.from_pretrained(
	# checkpoint, device_map="auto", offload_folder="offload", trust_remote_code=True, torch_dtype="auto")
	checkpoint, device_map="auto", offload_folder="offload", trust_remote_code=True)

	# tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True, torch_dtype="auto")
	tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)

	pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="auto",
	)

	def format_chat_prompt(message, chat_history, instruction):
	prompt = f"System:{instruction}"
	for turn in chat_history:
	user_message, bot_message = turn
	prompt = f"{prompt}\nUser: {user_message}\nAssistant: {bot_message}"
	prompt = f"{prompt}\nUser: {message}\nAssistant:"
	return prompt

	def generate_seqs(prompt, max_new_tokens=None, stop_sequence=None, temperature=None):
	output = pipeline(prompt,
	max_length=200,
	truncation=True,
	max_new_tokens = max_new_tokens,
	stop_sequence = stop_sequence,
	temperature=temperature,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id)
	return output[0]['generated_text']

	def respond(message, chat_history, instruction, temperature=0.7):
	prompt = format_chat_prompt(message, chat_history, instruction)
	chat_history = chat_history + [[message, ""]]
	stream = generate_seqs(prompt = prompt,
	max_new_tokens=8192,
	stop_sequence=["\nUser:", "<\|endoftext\|>"],
	temperature=temperature).split('Assistant: ')[-1]
	#stop_sequence to not generate the user answer
	acc_text = ""
	#Streaming the tokens
	for idx, response in enumerate(stream):
	# text_token = response.token.text
	text_token = response

	# if response.details:
	# return

	if idx == 0 and text_token.startswith(" "):
	text_token = text_token[1:]

	acc_text += text_token
	last_turn = list(chat_history.pop(-1))
	last_turn[-1] += acc_text
	chat_history = chat_history + [last_turn]
	yield "", chat_history
	acc_text = ""

	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# General purpose chatbot - test & demo app by Srinivas.V..
	## As this is a free hosted platform (Computing and Memory limitations), you will find it slow and the app may not provide appropriate answers after a few dialogues. Type in your prompt, click/ submit and wait for the resonse before typing in your next prompt.
	""")

	chatbot = gr.Chatbot(height=1000) #just to fit the notebook
	msg = gr.Textbox(label="Prompt")
	with gr.Accordion(label="Advanced options",open=False):
	system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
	temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1)
	btn = gr.Button("Submit")
	clear = gr.ClearButton(components=[msg, chatbot, system, temperature], value="Clear console")

	btn.click(respond, inputs=[msg, chatbot, system, temperature], outputs=[msg, chatbot])
	msg.submit(respond, inputs=[msg, chatbot, system, temperature], outputs=[msg, chatbot])
	gr.close_all()
	demo.queue().launch()