Spaces:

nafisneehal
/

chanbot

Sleeping

App Files Files Community

chanbot / app.py

nafisneehal

Update app.py

002d0ae verified 3 months ago

raw

history blame contribute delete

2.93 kB

	import gradio as gr
	import os
	import torch
	from peft import AutoPeftModelForCausalLM
	from transformers import AutoTokenizer
	import spaces

	# Check if we're running in a Hugging Face Space with GPU constraints
	IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
	IS_SPACE = os.environ.get("SPACE_ID", None) is not None

	# Get Hugging Face token from environment variables
	HF_TOKEN = os.environ.get('HF_TOKEN')

	# Determine device (use GPU if available)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"

	print(f"Using device: {device}")
	print(f"Low memory mode: {LOW_MEMORY}")

	# Model configuration
	load_in_4bit = True # Use 4-bit quantization if memory is constrained

	# Load model and tokenizer with device mapping
	model_name = "nafisneehal/chandler_bot"
	model = AutoPeftModelForCausalLM.from_pretrained(
	model_name,
	load_in_4bit=load_in_4bit
	)
	model.to(device)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Define prompt structure (update as needed for your model)
	alpaca_prompt = "{instruction} {input_text} {output}"

	@spaces.GPU # Use GPU provided by Hugging Face Spaces if available
	def generate_response(user_input, chat_history):
	instruction = "Chat with me like Chandler talks."
	input_text = user_input # Treats user input as the input

	# Format the input using the prompt template
	formatted_input = alpaca_prompt.format(instruction=instruction, input_text=input_text, output="")

	# Prepare inputs for model inference on the correct device
	inputs = tokenizer([formatted_input], return_tensors="pt").to(device)

	# Generate response on GPU or CPU as appropriate
	with torch.no_grad():
	outputs = model.generate(**inputs, max_new_tokens=100)

	# Decode response and remove the instruction part
	bot_reply = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(instruction, "").strip()

	# Update chat history with user and bot interactions
	chat_history.append(("User", user_input))
	chat_history.append(("Bot", bot_reply))

	return chat_history, "" # Returns updated chat history and clears input


	# Set up Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# Chandler-Like Chatbot on GPU")

	chat_history = gr.Chatbot(label="Chat History", elem_id="chatbox")
	user_input = gr.Textbox(
	placeholder="Type your message here...", label="Your Message")

	# Connect submit actions to generate response function
	user_input.submit(generate_response, [user_input, chat_history], [
	chat_history, user_input])
	submit_btn = gr.Button("Send")
	submit_btn.click(generate_response, [user_input, chat_history], [
	chat_history, user_input])

	# Custom CSS to align chat labels on the left
	demo.css = """
	#chatbox .bot, #chatbox .user {
	text-align: left;
	}
	"""

	demo.launch() # Enables a public link