import gradio as gr import torch import tiktoken import numpy as np from model import GPT, GPTConfig # Changed from train to model def load_quantized_model(): model = GPT(GPTConfig()) quantized_dict = torch.load("gpt_model_quantized.pt") # Dequantize model state_dict = {} for key, value in quantized_dict.items(): if isinstance(value, dict): state_dict[key] = torch.tensor( value['data'].astype(np.float32) * value['scale'] ) else: state_dict[key] = value model.load_state_dict(state_dict) model.eval() return model def generate_text(input_text): try: # Set device device = 'cuda' if torch.cuda.is_available() else 'cpu' # Load model model = load_quantized_model() model = model.to(device) # Tokenize input tokenizer = tiktoken.get_encoding('gpt2') input_tokens = torch.tensor([tokenizer.encode(input_text)]).to(device) # Generate with torch.no_grad(): output_tokens = model.generate(input_tokens, max_new_tokens=10)[0].tolist() # Decode and return generated_text = tokenizer.decode(output_tokens) return generated_text except Exception as e: return f"Error generating text: {e}" # Create Gradio interface iface = gr.Interface( fn=generate_text, inputs=gr.Textbox(lines=5, label="Input Text"), outputs=gr.Textbox(lines=10, label="Generated Text"), title="Text Generator", description="Enter some text and the model will generate a Shakespeare-style continuation.", examples=[ ["To be, or not to be,"], ["All the world's a stage, and all the men"], ["But soft, what light through yonder"], ["Friends, Romans, countrymen,"], ["Now is the winter of our discontent"] ] ) # Launch the interface iface.launch()