ElapticAI-1a-chat

Sleeping

App Files Files Community

ElapticAI-1a-chat / app.py

elapt1c

Update app.py

f710225 verified 3 months ago

raw

history blame

4.79 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig, AutoModel
	from huggingface_hub import hf_hub_download
	import os
	import torch.nn as nn

	# ----- Model Definition -----
	class CustomDialoGPT(nn.Module):
	def __init__(self, vocab_size, n_embd=768, n_head=8, n_layer=8): # <---- FORCE n_embd, n_head, n_layer to match DialoGPT-medium
	super().__init__()

	config = AutoConfig.from_pretrained("microsoft/DialoGPT-medium",
	vocab_size=vocab_size,
	n_embd=n_embd,
	n_head=n_head,
	n_layer=n_layer,
	bos_token_id=50256,
	eos_token_id=50256,
	pad_token_id = 50256
	)
	self.transformer = AutoModelForCausalLM.from_config(config) # Use AutoModelForCausalLM here
	self.lm_head = nn.Linear(n_embd, vocab_size, bias=False) # Keep lm_head

	def forward(self, input_ids):
	transformer_outputs = self.transformer(input_ids=input_ids, output_hidden_states=True)
	hidden_states = transformer_outputs.hidden_states[-1] #get last hidden state
	logits = self.lm_head(hidden_states)
	return logits


	# Model and tokenizer details
	model_repo = "elapt1c/ElapticAI-1a"
	model_filename = "model.pth" # <--- CHECK FILENAME ON HF HUB, UPDATE IF NEEDED!
	tokenizer_name = "microsoft/DialoGPT-medium"

	# Device configuration
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
	vocab_size = len(tokenizer)

	# Initialize model with fixed parameters to match checkpoint
	n_embd=768 # <---- FORCE n_embd to 768
	n_head=8 # <---- FORCE n_head to 12
	n_layer=8 # <---- FORCE n_layer to 12
	model = CustomDialoGPT(vocab_size, n_embd, n_head, n_layer)


	# Download and load model weights
	try:
	pth_filepath = hf_hub_download(repo_id=model_repo, filename=model_filename)
	checkpoint = torch.load(pth_filepath, map_location=device)

	# Handle different checkpoint saving formats if needed.
	# If your checkpoint is just the state_dict, load it directly.
	if 'model_state_dict' in checkpoint:
	model.load_state_dict(checkpoint['model_state_dict'])
	elif 'state_dict' in checkpoint:
	model.load_state_dict(checkpoint['state_dict'])
	else:
	# Assume checkpoint is just the raw state_dict
	model.load_state_dict(checkpoint)

	print(f"Successfully loaded model weights from {model_repo}/{model_filename}")
	except Exception as e:
	print(f"Error loading model: {e}")
	print("Please ensure the model repository and filename are correct and that the model architecture in app.py matches the checkpoint.")
	raise e # It's better to raise the error in a Space, so it's visible.

	model.to(device)
	model.eval() # Set model to evaluation mode

	def chat_with_model(user_input, history=[]):
	"""Chatbot function to interact with the loaded model."""
	history_transformer_format = history_to_transformer_format(history)
	input_text = tokenizer.eos_token.join(history_transformer_format + [user_input])

	input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)

	with torch.no_grad():
	output = model.transformer.generate( # Use model.transformer.generate here
	inputs=input_ids, # Use inputs instead of input_ids
	max_length=1000, # Adjust as needed
	pad_token_id=tokenizer.eos_token_id,
	temperature=0.7,
	top_p=0.9
	)

	response = tokenizer.decode(output[0], skip_special_tokens=True)

	# Extract only the bot's last response, assuming it's after the last user input.
	# This is a simple heuristic and might need adjustments based on training data format.
	split_response = response.split(tokenizer.eos_token)
	bot_response = split_response[-1].strip()

	history.append((user_input, bot_response))
	return bot_response, history

	def history_to_transformer_format(history):
	"""Convert gradio history to a list of strings for transformer input."""
	history_formatted = []
	for user_msg, bot_msg in history:
	history_formatted.append(user_msg)
	history_formatted.append(bot_msg)
	return history_formatted


	iface = gr.Interface( # Changed from gr.ChatInterface to gr.Interface
	fn=chat_with_model,
	inputs=gr.Textbox(placeholder="Type your message here..."), # Explicitly define inputs as gr.Textbox
	outputs=gr.Chatbot(), # Explicitly define outputs as gr.Chatbot
	title="ElapticAI-1a Chatbot",
	description="Simple chatbot interface for ElapticAI-1a model. Talk to the model and see its responses!",
	examples=[
	["Hello"],
	["How are you?"],
	["Tell me a joke"]
	]
	)

	if __name__ == "__main__":
	iface.launch()