Spaces:

deepapaikar
/

Katzbot-Phi2

App Files Files Community

Katzbot-Phi2 / app.py

deepapaikar's picture

Update app.py

ccc774e verified 8 months ago

3.05 kB

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import transformers
	import torch
	import gradio as gr

	#Setting device to cuda
	torch.set_default_device("cuda")

	device = 'cuda' if torch.cuda.is_available() else 'cpu'


	# model_name = "deepapaikar/katzbot-phi2"

	# model = AutoModelForCausalLM.from_from_pretrained(model_name)


	# Initialize the model and tokenizer
	model = AutoModelForCausalLM.from_pretrained("deepapaikar/katzbot-phi2",
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True)
	tokenizer = AutoTokenizer.from_pretrained("deepapaikar/katzbot-phi2", trust_remote_code=True)


	# pipeline = transformers.pipeline(
	# "text-generation",
	# model=model,
	# torch_dtype=torch.float16,
	# )

	# tokenizer = AutoTokenizer.from_pretrained(model_name)


	# def predict_answer(question, token=25):

	# messages = [{"role": "user", "content": f"{question}"}]


	# prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True).to(device)


	# outputs = pipeline(prompt, max_new_tokens=token, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)

	# return outputs[0]["generated_text"]

	def predict_answer(question, token=25):
	messages = [{"role": "user", "content": f"{question}"}]

	# Generate prompt text using the chat template
	prompt_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	# Tokenize the prompt text to create input IDs suitable for the model
	inputs = tokenizer(prompt_text, return_tensors="pt", padding=True, truncation=True)

	# Move the tensor to the specified device
	inputs = {k: v.to(device) for k, v in inputs.items()}

	# Use the model directly for inference
	model.eval() # Ensure the model is in evaluation mode
	model.to(device) # Ensure the model is on the correct device

	# Generate outputs
	output_sequences = model.generate(
	input_ids=inputs['input_ids'],
	attention_mask=inputs['attention_mask'],
	max_length=token + inputs['input_ids'].shape[-1], # Adjust max_length accordingly
	do_sample=True,
	temperature=0.7,
	top_k=50,
	top_p=0.95
	)

	# Decode the output sequences to text
	output_text = tokenizer.decode(output_sequences[0], skip_special_tokens=True)

	return output_text



	def gradio_predict(question, token):
	answer = predict_answer(question, token)
	return answer

	# Define the Gradio interface
	iface = gr.Interface(
	fn=gradio_predict,
	inputs=[gr.Textbox(label="Question", placeholder="e.g. Where is Yeshiva University located?", scale=4),
	gr.Slider(2, 100, value=25, label="Token Count", info="Choose between 2 and 100")],
	outputs=gr.TextArea(label="Answer"),
	title="KatzBot",
	description="Phi2-trial1",
	)

	# Launch the app
	iface.queue().launch(debug=True)