Spaces:
Running
Running
from typing import List, Tuple, Dict, Generator | |
from transformers import GPT2LMHeadModel, GPT2TokenizerFast | |
import torch | |
import gradio as gr | |
# Load the GPT-2 tokenizer | |
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") | |
# Load the saved GPT-2 model from the local checkpoint | |
model_path = "DuckyPolice/ElapticAI-1a" # Adjust to your specific model path if needed | |
model = GPT2LMHeadModel.from_pretrained(model_path) | |
# Move model to appropriate device (GPU if available, otherwise CPU) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
model.eval() | |
def create_history_messages(history: List[Tuple[str, str]]) -> List[dict]: | |
history_messages = [{"role": "user", "content": m[0]} for m in history] | |
history_messages.extend([{"role": "assistant", "content": m[1]} for m in history]) | |
return history_messages | |
def create_formatted_history(history_messages: List[dict]) -> List[Tuple[str, str]]: | |
formatted_history = [] | |
user_messages = [] | |
assistant_messages = [] | |
for message in history_messages: | |
if message["role"] == "user": | |
user_messages.append(message["content"]) | |
elif message["role"] == "assistant": | |
assistant_messages.append(message["content"]) | |
if user_messages and assistant_messages: | |
formatted_history.append( | |
("".join(user_messages), "".join(assistant_messages)) | |
) | |
user_messages = [] | |
assistant_messages = [] | |
# Append any remaining messages | |
if user_messages: | |
formatted_history.append(("".join(user_messages), None)) | |
elif assistant_messages: | |
formatted_history.append((None, "".join(assistant_messages))) | |
return formatted_history | |
def chat(message: str, state: List[Dict[str, str]]) -> Generator[Tuple[List[Tuple[str, str]], List[Dict[str, str]]], None, None]: | |
history_messages = state | |
if history_messages == None: | |
history_messages = [] | |
history_messages.append({"role": "system", "content": "A helpful assistant."}) | |
history_messages.append({"role": "user", "content": message}) | |
history_messages.append({"role": "assistant", "content": ""}) | |
# Tokenize user input and prepare input tensor | |
input_ids = tokenizer.encode(message, return_tensors='pt').to(device) | |
if input_ids.size(-1) == 0: | |
response_message = "Input was empty after tokenization. Please try again." | |
else: | |
# Generate tokens one by one | |
with torch.no_grad(): | |
for _ in range(50): # Limit generation to 50 tokens | |
outputs = model(input_ids) | |
next_token_logits = outputs.logits[:, -1, :] | |
next_token_id = torch.argmax(next_token_logits, dim=-1) | |
input_ids = torch.cat([input_ids, next_token_id.unsqueeze(-1)], dim=-1) | |
# Decode and append the latest token | |
decoded_token = tokenizer.decode(next_token_id) | |
history_messages[-1]["content"] += decoded_token | |
# Stop if the model generates the end-of-sequence token | |
if next_token_id.item() == tokenizer.eos_token_id: | |
break | |
response_message = history_messages[-1]["content"] | |
formatted_history = create_formatted_history(history_messages) | |
yield formatted_history, history_messages | |
chatbot = gr.Chatbot(label="Chat") | |
iface = gr.Interface( | |
fn=chat, | |
inputs=[gr.Textbox(placeholder="Hello! How are you?", label="Message"), "state"], | |
outputs=[chatbot, "state"], | |
allow_flagging="never", | |
) | |
iface.queue().launch() | |