Spaces:

distributed
/

model_convergence

Running

App Files Files Community

kmfoda commited on Sep 16, 2024

Commit

2467ab2

1 Parent(s): 34e3f6e

Add app.py

Browse files

Files changed (3) hide show

app.py +33 -0
evaluate.py +94 -0
results.json +318 -0

app.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import gradio as gr
+import json
+import pandas as pd
+with open('results.json', 'r') as file:
+    results = json.load(file)
+models = [key for key in results.keys()]
+demo = gr.Blocks()
+df = pd.DataFrame.from_dict(results[models[0]], orient = "index").reset_index()
+df.columns = ["Step", "Loss"]
+df["Step"] = pd.to_numeric(df["Step"])
+def return_results(model_name):
+    print(model_name)
+    df = pd.DataFrame.from_dict(results[model_name], orient = "index").reset_index()
+    df.columns = ["Step", "Loss"]
+    df["Step"] = pd.to_numeric(df["Step"])
+    return df
+with demo:
+    with gr.Row():
+        title = gr.Markdown(value=f"""# <p style="text-align: center;"> Subnet 38 Model Convergence</p>""")
+    with gr.Row():
+        dropdown_1 = gr.Dropdown(choices = models, value = models[0])
+        button_1 = gr.Button("Submit")
+    with gr.Row():
+        chart = gr.LinePlot(df, "Step", "Loss")
+    button_1.click(return_results, dropdown_1, chart)
+demo.launch(debug=True, server_name="0.0.0.0", server_port=7860)

evaluate.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from distributed_training.data.dataset import DataLoader
+import random
+from huggingface_hub import list_repo_refs
+import matplotlib.pyplot as plt
+import json
+device = "cuda"
+test_indices_length = 10
+models = ["distributed/optimized-gpt2-250m", "distributed/gpt2-250m"]
+with open('./results.json', 'r') as file:
+    results = json.load(file)
+for model_name in models:
+    if (model_name not in results.keys()) or (model_name == "distributed/optimized-gpt2-250m"):
+        results[model_name] = {}
+    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+    refs = list_repo_refs(model_name, repo_type="model")
+    global_epoch = max([int(tag.name) for tag in refs.tags]) if refs.tags else None
+    for epoch in range(0, global_epoch, 5):
+    # for epoch in [global_epoch]:
+        if str(epoch) in results[model_name].keys():
+            continue
+        model = AutoModelForCausalLM.from_pretrained(model_name, revision=str(epoch), trust_remote_code=True)
+        model = model.to(device)
+        search_start = random.choice(
+            range(
+                DataLoader.max_pages
+                - test_indices_length
+                + 1
+            )
+        )
+        group = [
+            i
+            for i in range(
+                search_start, search_start + test_indices_length
+            )
+        ]
+        dataloader = DataLoader(
+            batch_size=1,
+            sequence_length=1024,
+            rows=group,
+        )
+        total_loss = 0
+        index = 0
+        # Train data for one epoch
+        for index, batch in enumerate(dataloader):
+            inputs = batch[0].to(device)
+            labels = batch[1].to(device)
+            if (len(inputs[0]) != len(labels[0])):
+                breakpoint()
+            if "optimized" in model_name:
+                outputs = model(input_ids=inputs, labels=labels)
+                loss = outputs[1]
+            else:
+                outputs = model(input_ids=inputs, labels=inputs)
+                loss = outputs.loss
+            # Accumulate Total Loss
+            total_loss += loss.detach().item()
+            # Backward Pass
+            model.zero_grad()
+        average_loss = total_loss / (index+1)
+        results[model_name][str(epoch)] = [average_loss]
+        print(f"Epoch: {epoch}  Average Loss: {average_loss:.2f}")
+    # breakpoint()
+    with open("./results.json", "w") as outfile:
+        json.dump(results, outfile, indent = 4)
+for model_name in models:
+    plt.plot(results[model_name].keys(), results[model_name].values())
+    plt.title(f"{model_name} Convergence Over Time")
+    plt.xlabel("Steps")
+    plt.ylabel("Loss")
+    plt.xticks(fontsize=3.5)
+    plt.savefig(f"{model_name.split('/')[1]}_results.png")

results.json ADDED Viewed

	@@ -0,0 +1,318 @@

+{
+    "distributed/optimized-gpt2-250m": {
+        "0": [
+            11.042416954040528
+        ],
+        "5": [
+            9.064676761627197
+        ],
+        "10": [
+            8.353436279296876
+        ],
+        "15": [
+            8.157295894622802
+        ],
+        "20": [
+            7.744552771250407
+        ],
+        "25": [
+            7.923193550109863
+        ],
+        "30": [
+            7.360100865364075
+        ],
+        "35": [
+            7.582625230153401
+        ],
+        "40": [
+            7.635447263717651
+        ],
+        "45": [
+            7.298124694824219
+        ],
+        "50": [
+            7.584524154663086
+        ],
+        "55": [
+            7.3763152122497555
+        ],
+        "60": [
+            7.288678407669067
+        ],
+        "65": [
+            7.490873456001282
+        ],
+        "70": [
+            6.960979843139649
+        ],
+        "75": [
+            7.144528865814209
+        ],
+        "80": [
+            7.195922565460205
+        ],
+        "85": [
+            7.632096767425537
+        ],
+        "90": [
+            7.1985063552856445
+        ],
+        "95": [
+            6.93459119796753
+        ],
+        "100": [
+            6.701247930526733
+        ],
+        "105": [
+            7.049336791038513
+        ],
+        "110": [
+            6.837615370750427
+        ],
+        "115": [
+            7.020212531089783
+        ],
+        "120": [
+            6.697751712799072
+        ],
+        "125": [
+            6.588788318634033
+        ],
+        "130": [
+            6.7763800621032715
+        ],
+        "135": [
+            6.9689741134643555
+        ],
+        "140": [
+            6.709237098693848
+        ],
+        "145": [
+            7.035352826118469
+        ],
+        "150": [
+            6.6759562492370605
+        ],
+        "155": [
+            6.7904438972473145
+        ],
+        "160": [
+            6.934930443763733
+        ],
+        "165": [
+            6.596151669820149
+        ],
+        "170": [
+            6.548283481597901
+        ],
+        "175": [
+            6.447548770904541
+        ],
+        "180": [
+            6.536311149597168
+        ],
+        "185": [
+            6.70653502146403
+        ],
+        "190": [
+            6.557690461476644
+        ],
+        "195": [
+            6.67773175239563
+        ],
+        "200": [
+            6.467767238616943
+        ],
+        "205": [
+            6.4236222267150875
+        ],
+        "210": [
+            6.6386902809143065
+        ],
+        "215": [
+            6.141726970672607
+        ],
+        "220": [
+            6.378688907623291
+        ],
+        "225": [
+            6.42099928855896
+        ],
+        "230": [
+            6.738618612289429
+        ],
+        "235": [
+            6.558012008666992
+        ],
+        "240": [
+            6.777796030044556
+        ],
+        "245": [
+            6.396033000946045
+        ],
+        "250": [
+            6.102731609344483
+        ],
+        "255": [
+            6.540631294250488
+        ]
+    },
+    "distributed/gpt2-250m": {
+        "0": [
+            10.942681312561035
+        ],
+        "5": [
+            9.673693656921387
+        ],
+        "10": [
+            9.623630285263062
+        ],
+        "15": [
+            9.381710529327393
+        ],
+        "20": [
+            9.240305423736572
+        ],
+        "25": [
+            9.34835402170817
+        ],
+        "30": [
+            9.45114345550537
+        ],
+        "35": [
+            9.190510940551757
+        ],
+        "40": [
+            8.936849594116211
+        ],
+        "45": [
+            8.903728485107422
+        ],
+        "50": [
+            8.871788597106933
+        ],
+        "55": [
+            8.653409957885742
+        ],
+        "60": [
+            8.565237998962402
+        ],
+        "65": [
+            8.616942405700684
+        ],
+        "70": [
+            8.725053310394287
+        ],
+        "75": [
+            8.058599853515625
+        ],
+        "80": [
+            8.40323429107666
+        ],
+        "85": [
+            8.251930522918702
+        ],
+        "90": [
+            8.315114784240723
+        ],
+        "95": [
+            8.024084663391113
+        ],
+        "100": [
+            8.095765829086304
+        ],
+        "105": [
+            8.223698139190674
+        ],
+        "110": [
+            7.960695743560791
+        ],
+        "115": [
+            7.827797985076904
+        ],
+        "120": [
+            8.389174143473307
+        ],
+        "125": [
+            7.795609354972839
+        ],
+        "130": [
+            8.024239349365235
+        ],
+        "135": [
+            7.622925678888957
+        ],
+        "140": [
+            7.671920299530029
+        ],
+        "145": [
+            7.719462108612061
+        ],
+        "150": [
+            7.654707551002502
+        ],
+        "155": [
+            7.858335399627686
+        ],
+        "160": [
+            7.582762241363525
+        ],
+        "165": [
+            7.7280534108479815
+        ],
+        "170": [
+            7.398298358917236
+        ],
+        "175": [
+            7.448758959770203
+        ],
+        "180": [
+            7.248022079467773
+        ],
+        "185": [
+            7.408734480539958
+        ],
+        "190": [
+            7.431381821632385
+        ],
+        "195": [
+            7.13822078704834
+        ],
+        "200": [
+            7.499457120895386
+        ],
+        "205": [
+            7.281359386444092
+        ],
+        "210": [
+            7.49737777709961
+        ],
+        "215": [
+            7.441878795623779
+        ],
+        "220": [
+            7.27855650583903
+        ],
+        "225": [
+            7.162156343460083
+        ],
+        "230": [
+            7.732161164283752
+        ],
+        "235": [
+            6.726261933644612
+        ],
+        "240": [
+            6.9339855194091795
+        ],
+        "245": [
+            7.31608259677887
+        ],
+        "250": [
+            7.316546440124512
+        ],
+        "255": [
+            7.263134765625
+        ]
+    }
+}