Spaces:

mudogruer
/

phi-2-science-QA

Sleeping

App Files Files Community

phi-2-science-QA / app.py

mudogruer

Upload app.py

53dd4da verified about 1 year ago

raw

history blame

2.7 kB

	# -- coding: utf-8 --
	"""Untitled14.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1qm9JqCY6CGVTqzvw3GEAI8BsI_-w1rwP
	"""

	# !pip install -q -U gradio
	# !pip install -q -U torch transformers accelerate einops
	# !pip install -q peft

	examples = [["Which organelle carries out the synthesis and packaging of digestive enzymes?"],
	["What is the change in speed of a moving object per unit time?"] ,
	["What is the formula of carbon tetrafluoride?"]]

	import gradio as gr
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	TextIteratorStreamer,
	pipeline,
	)
	from peft import PeftModel, PeftConfig

	# Load the tokenizer and models
	tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
	config = PeftConfig.from_pretrained("mudogruer/phi-2-SciQ")
	base_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
	model = PeftModel.from_pretrained(base_model, "mudogruer/phi-2-SciQ")

	# Text generation pipeline
	phi2 = pipeline(
	"text-generation",
	tokenizer=tokenizer,
	model=model,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	device_map="cpu",
	)

	def generate(message, max_new_tokens):

	# Provide a default value if max_new_tokens is None
	if max_new_tokens is None:
	max_new_tokens = 50 # Default value; adjust as needed based on your typical use case

	instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
	final_prompt = f"Instruction: {instruction}\nUser: {message}\nOutput:"

	# Calculate the total length allowed for the model and adjust max_new_tokens
	input_length = len(tokenizer.encode(final_prompt))
	total_max_length = 512 # Adjust based on your model's max length capability
	if input_length + max_new_tokens > total_max_length:
	max_new_tokens = total_max_length - input_length # Adjust to not exceed total max length

	# Generate text synchronously
	response = phi2(final_prompt, max_new_tokens=max_new_tokens)
	generated_text = response[0]['generated_text']

	# Extract the response
	last_response = generated_text.split('Output:')[-1].strip()
	return last_response

	# Gradio interface setup
	with gr.Blocks() as demo:
	gr.Markdown("""### Phi-2 Scientific Question Chatbot (Fine-tuned from SciQ dataset)""")
	tokens_slider = gr.Slider(8, 128, value=21, label="Maximum new tokens")
	chatbot = gr.Interface(fn=generate, inputs=["text", tokens_slider], outputs="text", examples=examples)
	demo.launch(share=True)