SeemG commited on
Commit
117a6f0
·
verified ·
1 Parent(s): dba5c21

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +48 -0
model.py CHANGED
@@ -10,6 +10,54 @@ import torch
10
  import torch.nn as nn
11
  from torch.nn import functional as F
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  class LayerNorm(nn.Module):
14
  """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
15
 
 
10
  import torch.nn as nn
11
  from torch.nn import functional as F
12
 
13
+
14
+
15
+ # hyperparameters
16
+ batch_size = 16 # how many independent sequences will we process in parallel?
17
+ block_size = 32 # what is the maximum context length for predictions?
18
+ max_iters = 5000
19
+ eval_interval = 100
20
+ learning_rate = 1e-3
21
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
22
+ eval_iters = 200
23
+ n_embd = 64
24
+ n_head = 4
25
+ n_layer = 4
26
+ dropout = 0.0
27
+ # ------------
28
+
29
+ torch.manual_seed(1337)
30
+
31
+ # wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
32
+ with open('input.txt', 'r', encoding='utf-8') as f:
33
+ text = f.read()
34
+
35
+ # here are all the unique characters that occur in this text
36
+ chars = sorted(list(set(text)))
37
+ vocab_size = len(chars)
38
+ # create a mapping from characters to integers
39
+ stoi = { ch:i for i,ch in enumerate(chars) }
40
+ itos = { i:ch for i,ch in enumerate(chars) }
41
+ encode = lambda s: [stoi[c] for c in s] # encoder: take a string, output a list of integers
42
+ decode = lambda l: ''.join([itos[i] for i in l]) # decoder: take a list of integers, output a string
43
+
44
+ # Train and test splits
45
+ data = torch.tensor(encode(text), dtype=torch.long)
46
+ n = int(0.9*len(data)) # first 90% will be train, rest val
47
+ train_data = data[:n]
48
+ val_data = data[n:]
49
+
50
+ # data loading
51
+ def get_batch(split):
52
+ # generate a small batch of data of inputs x and targets y
53
+ data = train_data if split == 'train' else val_data
54
+ ix = torch.randint(len(data) - block_size, (batch_size,))
55
+ x = torch.stack([data[i:i+block_size] for i in ix])
56
+ y = torch.stack([data[i+1:i+block_size+1] for i in ix])
57
+ x, y = x.to(device), y.to(device)
58
+ return x, y
59
+
60
+
61
  class LayerNorm(nn.Module):
62
  """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
63