philschmid
/

gemma-7b-dolly-chatml

Generated from Trainer

Model card Files Files and versions Metrics Training metrics Community

gemma-7b-dolly-chatml / inference.py

philschmid's picture

Upload 2 files

48c7a89 verified about 1 year ago

1.57 kB

	import torch
	from peft import AutoPeftModelForCausalLM
	from transformers import AutoTokenizer, pipeline

	peft_model_id = "philschmid/gemma-7b-dolly-chatml"

	# Load Model with PEFT adapter
	tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
	model = AutoPeftModelForCausalLM.from_pretrained(peft_model_id, device_map="auto", torch_dtype=torch.float16)
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

	# run inference
	messages = [
	{
	"role": "user",
	"content": "What is the capital of Germany? Explain why thats the case and if it was different in the past?"
	}
	]

	prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, pad_token_id=pipe.tokenizer.pad_token_id, eos_token_id=pipe.tokenizer.eos_token_id)
	print(outputs[0]["generated_text"])

	# run inference
	messages = [
	{
	"role": "user",
	"content": "In a town, 60% of the population are adults. Among the adults, 30% have a pet dog and 40% have a pet cat. What percentage of the total population has a pet dog?"
	}
	]

	prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	outputs = pipe(prompt, max_new_tokens=1024, do_sample=True, temperature=0.7, top_k=50, top_p=0.95, pad_token_id=pipe.tokenizer.pad_token_id, eos_token_id=pipe.tokenizer.eos_token_id)
	print(outputs[0]["generated_text"])


	# pip3 list \| grep -e transformers -e peft -e torch -e trl -e accelerate