Spaces:
Sleeping
Sleeping
File size: 1,699 Bytes
b57fe5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import torch
from config.model_config import ModelConfig
from src.data.tokenizer import CharacterTokenizer
from src.model.gpt import GPTModel
from src.training.trainer import train
from src.utils.helpers import generate, setup_logging, prepare_data
def main():
# Setup logging
logger = setup_logging()
# Load config
config = ModelConfig()
# Setup device
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info(f"Using device: {device}")
# Load data
with open(config.data_path) as f:
text = f.read()
tokenizer = CharacterTokenizer(text)
# Prepare data
prepare_data(text, tokenizer)
# Create model
model = GPTModel(config, tokenizer.vocab_size)
model = model.to(device)
# Setup optimizer
optimizer = torch.optim.AdamW(
model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay
)
# Train
train(
model=model,
optimizer=optimizer,
max_iters=config.max_iters,
eval_interval=config.eval_interval,
eval_iters=config.eval_iters,
block_size=config.block_size,
batch_size=config.batch_size,
device=device,
checkpoint_path=config.checkpoint_path,
)
# Generate samples
model = torch.load(config.checkpoint_path, map_location=device)
for prompt in ["hello", "my name is", "america is"]:
result = generate(model, tokenizer, prompt, max_tokens=200, device=device)
logger.info(f"\nPrompt: {prompt}")
logger.info(f"Generated: {result}")
logger.info("=" * 40)
if __name__ == "__main__":
main()
|