Neu256 commited on
Commit
61ea8cf
·
verified ·
1 Parent(s): a2d8cae

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +1 -56
utils.py CHANGED
@@ -3,18 +3,12 @@ import torch
3
  from datetime import datetime
4
 
5
  # hyperparameters
6
- BATCH_SIZE = 64 # how many independent sequences will we process in parallel?
7
- BLOCK_SIZE = 128 # what is the maximum context length for predictions?
8
- MAX_ITER = 2 # number of training iterations
9
- EVAL_INTER = 1
10
- LEARNING_RATE = 1e-5
11
- EPS = 1e-5
12
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
13
  NUM_HEAD = 6
14
  NUM_EMBED = NUM_HEAD * 128
15
  NUM_LAYER = 8
16
  DROPOUT = 0.3
17
- MAX_SEQ_LEN = 2048
18
 
19
  def encode(text_seq: str, tokenizer: any) -> torch.Tensor:
20
  """
@@ -37,52 +31,3 @@ def decode(enc_sec: torch.Tensor, tokenizer: any) -> str:
37
  # decode the indices to a string
38
  text = tokenizer.decode(enc_sec)
39
  return text
40
-
41
-
42
- def get_batch(data: list[str], block_size: int, batch_size: int):
43
- """
44
- This is a simple function to create batches of data.
45
- GPUs allow for parallel processing we can feed multiple chunks at once
46
- so that's why we would need batches - how many independant sequences
47
- will we process in parallel.
48
-
49
- Parameters:
50
- data: list[str]: data to take batch from
51
- block_size (int): size of the text that is proccessed at once
52
- batch_size (int): number of sequences to process in parallel
53
-
54
- Returns:
55
- x, y: a tuple with token sequence and token target
56
- """
57
-
58
- ix = torch.randint(len(data) - block_size, (batch_size, ))
59
- # we stack batch_size rows of sentences
60
- # so x and y are the matrices with rows_num=batch_size
61
- # and col_num=block_size
62
- x = torch.stack([data[i : i + block_size] for i in ix])
63
- # y is x shifted one position right - because we predict
64
- # word in y having all the previous words as context
65
- y = torch.stack([data[i + 1 : i + block_size + 1] for i in ix])
66
- x, y = x.to(DEVICE), y.to(DEVICE)
67
- return x, y
68
-
69
-
70
- @torch.no_grad()
71
- def estimate_loss(
72
- val_loader,
73
- model: torch.nn.Module,
74
- eval_iters: int = 10
75
- ):
76
- out = {}
77
- model.eval()
78
- losses = torch.zeros(eval_iters)
79
- k = 0
80
- for x, y in val_loader:
81
- if k >= eval_iters:
82
- break
83
- logits, loss = model.forward(x, y)
84
- losses[k] = loss.item()
85
- k += 1
86
- out = losses.mean()
87
- model.train()
88
- return out
 
3
  from datetime import datetime
4
 
5
  # hyperparameters
 
 
 
 
 
 
6
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
7
  NUM_HEAD = 6
8
  NUM_EMBED = NUM_HEAD * 128
9
  NUM_LAYER = 8
10
  DROPOUT = 0.3
11
+ MAX_SEQ_LEN = 4096
12
 
13
  def encode(text_seq: str, tokenizer: any) -> torch.Tensor:
14
  """
 
31
  # decode the indices to a string
32
  text = tokenizer.decode(enc_sec)
33
  return text