anjikum's picture
Upload app.py
95deaf9 verified
import gradio as gr
import torch
import tiktoken
import numpy as np
from model import GPT, GPTConfig # Changed from train to model
def load_quantized_model():
model = GPT(GPTConfig())
quantized_dict = torch.load("gpt_model_quantized.pt")
# Dequantize model
state_dict = {}
for key, value in quantized_dict.items():
if isinstance(value, dict):
state_dict[key] = torch.tensor(
value['data'].astype(np.float32) * value['scale']
)
else:
state_dict[key] = value
model.load_state_dict(state_dict)
model.eval()
return model
def generate_text(input_text):
try:
# Set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load model
model = load_quantized_model()
model = model.to(device)
# Tokenize input
tokenizer = tiktoken.get_encoding('gpt2')
input_tokens = torch.tensor([tokenizer.encode(input_text)]).to(device)
# Generate
with torch.no_grad():
output_tokens = model.generate(input_tokens, max_new_tokens=10)[0].tolist()
# Decode and return
generated_text = tokenizer.decode(output_tokens)
return generated_text
except Exception as e:
return f"Error generating text: {e}"
# Create Gradio interface
iface = gr.Interface(
fn=generate_text,
inputs=gr.Textbox(lines=5, label="Input Text"),
outputs=gr.Textbox(lines=10, label="Generated Text"),
title="Text Generator",
description="Enter some text and the model will generate a Shakespeare-style continuation.",
examples=[
["To be, or not to be,"],
["All the world's a stage, and all the men"],
["But soft, what light through yonder"],
["Friends, Romans, countrymen,"],
["Now is the winter of our discontent"]
]
)
# Launch the interface
iface.launch()