Spaces:

kalekarnn
/

fine-tuned-phi-2-model

Sleeping

App Files Files Community

fine-tuned-phi-2-model / app.py

kalekarnn

Update app.py

d2e30f5 verified 4 months ago

raw

history blame contribute delete

2.43 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	import torch

	# Load base model and tokenizer
	model_name = "microsoft/phi-2"
	base_model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map={"": "cpu"}, # Force CPU usage
	trust_remote_code=True
	)
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

	# Load fine-tuned adapter
	model = PeftModel.from_pretrained(
	base_model,
	"phi2-finetuned-final",
	device_map={"": "cpu"} # Force CPU usage
	)

	def generate_response(message, history):
	# Format input as instruction-based conversation
	prompt = "You are a helpful AI assistant. Please provide clear and concise responses.\n\n"
	for human, assistant in history[-7:]: # Keep last 7 exchanges for context
	prompt += f"Instruction: {human}\nResponse: {assistant}\n\n"
	prompt += f"Instruction: {message}\nResponse:"

	# Generate response with limited length
	inputs = tokenizer(prompt, return_tensors="pt")
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=96, # Limited to 96 tokens
	max_length=512, # Keep history context at 512
	temperature=0.6,
	num_return_sequences=1,
	pad_token_id=tokenizer.eos_token_id,
	do_sample=True,
	top_p=0.7,
	min_length=1,
	eos_token_id=tokenizer.eos_token_id,
	early_stopping=True,
	no_repeat_ngram_size=3,
	repetition_penalty=1.2
	)
	response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
	return response.strip()

	# Create Gradio interface
	css = """
	.gradio-container {max-width: 1000px !important}
	.chatbot {min-height: 700px !important}
	.chat-message {font-size: 16px !important}
	"""

	demo = gr.ChatInterface(
	generate_response,
	chatbot=gr.Chatbot(height=700), # Increased height
	textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=0.9),
	title="Phi-2 Conversational Assistant",
	description="A fine-tuned Phi-2 model for conversational AI",
	theme="soft",
	css=css,
	examples=["Tell me about yourself",
	"What can you help me with?",
	"How do you process information?"],
	)

	if __name__ == "__main__":
	demo.launch(share=True)