Spaces:

eddyejembi
/

llama2_chatbot

Runtime error

App Files Files Community

llama2_chatbot / app.py

eddyejembi

Update app.py

f1c9abf almost 2 years ago

raw

history blame contribute delete

2.37 kB

	import transformers
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	from transformers import pipeline
	import gradio as gr

	llama_model = "meta-llama/Llama-2-7b-chat-hf"
	access_token = " "

	model = AutoModelForCausalLM.from_pretrained(llama_model, token=access_token)
	tokenizer = AutoTokenizer.from_pretrained(llama_model, token=access_token)

	pipeline = transformers.pipeline(
	"text-generation",
	model=model,
	torch_dtype=torch.float16,
	device_map="auto",
	)

	SYSTEM_PROMPT = """<s>[INST] <<SYS>>
	You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

	If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
	<</SYS>>

	"""

	# Formatting function for message and history
	def message_format(message: str, history: list, memory_limit: int = 5) -> str:

	# always keep len(history) <= memory_limit
	if len(history) > memory_limit:
	history = history[-memory_limit:]

	if len(history) == 0:
	return SYSTEM_PROMPT + f"{message} [/INST]"

	formatted_message = SYSTEM_PROMPT + f"{history[0][0]} [/INST] {history[0][1]} </s>"

	# Handle conversation history
	for user_msg, model_answer in history[1:]:
	formatted_message += f"<s>[INST] {user_msg} [/INST] {model_answer} </s>"

	# Handle the current message
	formatted_message += f"<s>[INST] {message} [/INST]"

	return formatted_message

	# Generate a response from the Llama model
	def llama_response(message: str, history: list) -> str:

	query = message_format(message, history)
	response = ""

	sequences = pipeline(
	query,
	do_sample=True,
	top_k=10,
	num_return_sequences=1,
	eos_token_id=tokenizer.eos_token_id,
	max_length=1050,
	)

	generated_text = sequences[0]['generated_text']
	response = generated_text[len(query):] # Remove the prompt from the output

	print("Chatbot:", response.strip())
	return response.strip()


	gr.ChatInterface(llama_response).launch()