File size: 1,993 Bytes
c3fe4b2 16179bd c3fe4b2 16179bd c3fe4b2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import torch
from transformers import AutoTokenizer
from model import SmolLM2, SmolLM2Config
import gradio as gr
import zipfile
import io
# Initialize model and tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer")
model = SmolLM2(SmolLM2Config())
# Load trained weights
checkpoint = torch.load('checkpoint_step_5000.pt', map_location=device) # Adjust path as needed
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()
def generate_text(prompt, max_length=100, temperature=0.7, top_k=50):
"""Generate text from a prompt"""
# Tokenize the prompt
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
# Generate
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_length,
temperature=temperature,
top_k=top_k
)
# Decode and return the generated text
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return generated_text
# Gradio interface
def gradio_interface(prompt, max_length, temperature, top_k):
return generate_text(prompt, int(max_length), float(temperature), int(top_k))
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter your prompt here..."),
gr.Slider(minimum=10, maximum=500, value=100, step=10, label="Max Length"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top K"),
],
outputs=gr.Textbox(label="Generated Text"),
title="SmolLM2 Text Generation",
description="Generate text using the SmolLM2 model"
)
# For Hugging Face deployment
app = gr.mount_gradio_app(app, iface)
if __name__ == "__main__":
iface.launch() |