SmolLM-2-135M / app.py
tranquilkd's picture
Initial commit
bac9d3f
raw
history blame
3.78 kB
from json import load
import os
import torch
import gradio as gr
from typing import Optional
from dataclasses import dataclass
from transformers import AutoTokenizer
from model import Transformer
@dataclass
class ModelArgs:
# Arch params
dim: int = 576
intermediate_dim: int = 1536
n_layers: int = 30
n_heads: int = 9
n_kv_heads: Optional[int] = 3
vocab_size: int = 49152 # defined later by tokenizer
norm_eps: float = 1.0e-05
init_scale: float = 0.041666666666666664
rope_theta: int = 10000
dropout: float = 0.1
# Training params
seed: int = 42
max_batch_size: int = 2
max_seq_len: int = 2048
steps: int = 5050
breakpoint_step: int = 5000
warmup_steps_frac: float = 0.5
save_interval:int = 1000
eval_interval:int = 500
log_interval: int = 1
grad_accum_steps: int = 8
checkpoint_path = os.path.join(os.getcwd(), "checkpoints")
device: str = "cuda" if torch.cuda.is_available() else "cpu"
# Optimizer
initial_lr: float = 5e-4
adam_beta1: float = 0.9
adam_beta2: float = 0.95
adam_eps: float = 1.0e-08
weight_decay: float = 0.01
use_fused: bool = True
# Initialize model and tokenizer
device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/cosmo2-tokenizer")
config = ModelArgs()
config.device = device
model = Transformer(config)
# Load trained weights from zip
def load_checkpoint(model, path, device):
try:
checkpoint = torch.load(path, map_location=device)
model.load_state_dict({k.replace("_orig_mod.", ""): v for k, v in checkpoint.items() if 'cached_keys' not in k and 'cached_values' not in k})
return model
except Exception as e:
print(f"Error loading checkpoint: {e}")
return None
model = load_checkpoint(model, "smollm2_HF.pth", device)
model.to(device)
model.eval()
def generate_text(prompt,
min_length: int = 28,
max_length: int = 40,
temperature: float =0.7,
top_k: int = 50,
top_p: float = 0.7
):
"""Generate text from a prompt"""
input_ids = tokenizer(prompt,
padding=True,
truncation=True,
max_length=config.max_seq_len,
return_tensors="pt")["input_ids"].to(device)
generated = model.generate(
input_ids,
max_length=max_length,
min_length=min_length,
pad_token_id=tokenizer.pad_token_id,
do_sample=True,
temperature=temperature,
top_k=top_k,
top_p=top_p
)
return tokenizer.decode(generated[0], skip_special_tokens=True)
# Gradio interface
def gradio_interface(prompt, max_length, temperature, top_k):
return generate_text(prompt, int(max_length), float(temperature), int(top_k))
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Textbox(label="Prompt", placeholder="Enter your prompt here..."),
gr.Slider(minimum=10, maximum=500, label="Min Length"),
gr.Slider(minimum=10, maximum=500, label="Max Length"),
gr.Slider(minimum=0.1, maximum=2.0, label="Temperature"),
gr.Slider(minimum=1, maximum=100, label="Top K"),
gr.Slider(minimum=0.1, maximum=1.0, label="Top P")
],
outputs=gr.Textbox(label="Generated Text"),
title="SmolLM2-135M Text Generation",
description="SmolLM2-135M trained onn cosmopedia-v2 with just 5000 steps",
examples=[
["I found the love", 50, 0.7, 50],
["When the sun comes up", 40, 0.8, 40],
["The slow marching of ", 60, 0.9, 45]
],
)
if __name__ == "__main__":
iface.launch()