anjikum's picture
Upload app.py
16179bd verified
raw
history blame
1.99 kB
import torch
from transformers import AutoTokenizer
from model import SmolLM2, SmolLM2Config
import gradio as gr
import zipfile
import io
# Initialize model and tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer")
model = SmolLM2(SmolLM2Config())
# Load trained weights
checkpoint = torch.load('checkpoint_step_5000.pt', map_location=device) # Adjust path as needed
model.load_state_dict(checkpoint['model_state_dict'])
model.to(device)
model.eval()
def generate_text(prompt, max_length=100, temperature=0.7, top_k=50):
"""Generate text from a prompt"""
# Tokenize the prompt
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
# Generate
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_new_tokens=max_length,
temperature=temperature,
top_k=top_k
)
# Decode and return the generated text
generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
return generated_text
# Gradio interface
def gradio_interface(prompt, max_length, temperature, top_k):
return generate_text(prompt, int(max_length), float(temperature), int(top_k))
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter your prompt here..."),
gr.Slider(minimum=10, maximum=500, value=100, step=10, label="Max Length"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top K"),
],
outputs=gr.Textbox(label="Generated Text"),
title="SmolLM2 Text Generation",
description="Generate text using the SmolLM2 model"
)
# For Hugging Face deployment
app = gr.mount_gradio_app(app, iface)
if __name__ == "__main__":
iface.launch()