Spaces:

ahmedbasemdev
/

FineTunedChatbot

Runtime error

FineTunedChatbot / app.py

Update app.py

3b3c5cf verified 8 months ago

1.7 kB

	import gradio as gr

	# Load your model and tokenizer
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Specify the model name
	model_name = "ahmedbasemdev/llama-3.2-3b-ChatBot"

	# Load the model with 8-bit quantization
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="auto", # Automatically map the model to the available device (CPU)
	load_in_8bit=True, # Enable 8-bit quantization
	torch_dtype=torch.float16 # Use mixed precision
	)

	# Load the tokenizer
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	def single_inference(question):
	messages = []

	messages.append({"role": "user", "content": question})

	input_ids = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	return_tensors="pt"
	).to(model.device)

	terminators = [
	tokenizer.eos_token_id,
	tokenizer.convert_tokens_to_ids("<\|eot_id\|>")
	]

	outputs = model.generate(
	input_ids,
	max_new_tokens=256,
	eos_token_id=terminators,
	do_sample=True,
	temperature=0.2,
	)
	response = outputs[0][input_ids.shape[-1]:]
	output = tokenizer.decode(response, skip_special_tokens=True)
	return output

	# Create the Gradio interface
	interface = gr.Interface(
	fn=single_inference, # Function to wrap
	inputs=gr.Textbox(lines=2, placeholder="Ask a question..."), # Input type
	outputs=gr.Textbox(label="Response"), # Output type
	title="Chat with Your Model", # App title
	description="Enter a question, and the model will generate a response.", # App description
	)

	# Launch the app
	if __name__ == "__main__":
	interface.launch()