import torch from transformers import AutoTokenizer from model import SmolLM2, SmolLM2Config import gradio as gr import zipfile import io # Initialize model and tokenizer device = 'cuda' if torch.cuda.is_available() else 'cpu' tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer") model = SmolLM2(SmolLM2Config()) # Load trained weights checkpoint = torch.load('checkpoint_step_5000.pt', map_location=device) # Adjust path as needed model.load_state_dict(checkpoint['model_state_dict']) model.to(device) model.eval() def generate_text(prompt, max_length=100, temperature=0.7, top_k=50): """Generate text from a prompt""" # Tokenize the prompt input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device) # Generate with torch.no_grad(): output_ids = model.generate( input_ids, max_new_tokens=max_length, temperature=temperature, top_k=top_k ) # Decode and return the generated text generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True) return generated_text # Gradio interface def gradio_interface(prompt, max_length, temperature, top_k): return generate_text(prompt, int(max_length), float(temperature), int(top_k)) iface = gr.Interface( fn=gradio_interface, inputs=[ gr.Textbox(label="Prompt", placeholder="Enter your prompt here..."), gr.Slider(minimum=10, maximum=500, value=100, step=10, label="Max Length"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top K"), ], outputs=gr.Textbox(label="Generated Text"), title="SmolLM2 Text Generation", description="Generate text using the SmolLM2 model" ) # For Hugging Face deployment app = gr.mount_gradio_app(app, iface) if __name__ == "__main__": iface.launch()