File size: 2,535 Bytes
0f87068
 
 
2467ab2
d5bfcd1
2467ab2
 
0f87068
2467ab2
 
d5bfcd1
2467ab2
56b6f69
2467ab2
d5bfcd1
 
 
 
 
2467ab2
 
 
afdd9d7
2467ab2
 
 
 
 
 
 
d5bfcd1
2467ab2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56b6f69
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import json
import random

import torch
import os
from distributed_training.data.dataset import DataLoader
from huggingface_hub import list_repo_refs
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"
test_indices_length = 1000

models = ["distributed/optimized-gpt2-250m", "distributed/optimized-gpt2-250m-v0.1.1", "distributed/gpt2-94m"]

if os.path.exists("results.json"):
    with open('results.json', 'r') as file:
        results = json.load(file)
else:
    results = {}

for model_name in models:

    if (model_name not in results.keys()):
        results[model_name] = {}

    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

    refs = list_repo_refs(model_name, repo_type="model")
    global_epoch = max([int(tag.name) for tag in refs.tags]) if refs.tags else None

    for epoch in range(0,global_epoch, 5):

        if str(epoch) in results[model_name].keys():
            continue

        model = AutoModelForCausalLM.from_pretrained(model_name, revision=str(epoch), trust_remote_code=True)
        model = model.to(device)

        search_start = random.choice(
            range(
                DataLoader.max_pages
                - test_indices_length
                + 1
            )
        )
        group = [
            i
            for i in range(
                search_start, search_start + test_indices_length
            )
        ]

        dataloader = DataLoader(
            batch_size=1,
            sequence_length=1024,
            rows=group,
        )

        total_loss = 0
        index = 0
        # Train data for one epoch
        for index, batch in enumerate(dataloader):
            inputs = batch[0].to(device)
            labels = batch[1].to(device)

            if (len(inputs[0]) != len(labels[0])):
                breakpoint()
                
            if "optimized" in model_name:
                outputs = model(input_ids=inputs, labels=labels)
                loss = outputs[1]
            else:
                outputs = model(input_ids=inputs, labels=inputs)
                loss = outputs.loss

            # Accumulate Total Loss
            total_loss += loss.detach().item()

            # Backward Pass
            model.zero_grad()

        average_loss = total_loss / (index+1)
        results[model_name][str(epoch)] = [average_loss]
        print(f"Epoch: {epoch}  Average Loss: {average_loss:.2f}")

        with open("results.json", "w") as outfile:
            json.dump(results, outfile, indent = 4)