thomwolf HF staff commited on
Commit
6a71dad
·
1 Parent(s): 878bd55
dist/index.html CHANGED
@@ -157,9 +157,10 @@
157
  <div class="row">
158
  <label for="zero">Zero:</label>
159
  <select id="zero" name="zero">
160
- <option value="Optimizer">Optimizer</option>
161
- <option value="Gradients">Gradients</option>
162
- <option value="Parameters">Parameters</option>
 
163
  </select>
164
  <span></span> <!-- Empty span to maintain grid alignment -->
165
  </div>
 
157
  <div class="row">
158
  <label for="zero">Zero:</label>
159
  <select id="zero" name="zero">
160
+ <option value="0">0</option>
161
+ <option value="1">1</option>
162
+ <option value="2">2</option>
163
+ <option value="3">3</option>
164
  </select>
165
  <span></span> <!-- Empty span to maintain grid alignment -->
166
  </div>
dist/main.bundle.js CHANGED
The diff for this file is too large to render. See raw diff
 
dist/main.bundle.js.map CHANGED
The diff for this file is too large to render. See raw diff
 
src/index.html CHANGED
@@ -157,9 +157,10 @@
157
  <div class="row">
158
  <label for="zero">Zero:</label>
159
  <select id="zero" name="zero">
160
- <option value="Optimizer">Optimizer</option>
161
- <option value="Gradients">Gradients</option>
162
- <option value="Parameters">Parameters</option>
 
163
  </select>
164
  <span></span> <!-- Empty span to maintain grid alignment -->
165
  </div>
 
157
  <div class="row">
158
  <label for="zero">Zero:</label>
159
  <select id="zero" name="zero">
160
+ <option value="0">0</option>
161
+ <option value="1">1</option>
162
+ <option value="2">2</option>
163
+ <option value="3">3</option>
164
  </select>
165
  <span></span> <!-- Empty span to maintain grid alignment -->
166
  </div>
src/memory.js CHANGED
@@ -89,14 +89,14 @@ export function activationMemory(
89
  return data;
90
  }
91
 
92
- export function paramGradsOpt(h, L, s, v, k = 8, dp = 1, zero = "Optimizer", mixed = true) {
93
  // h, # hidden dimension size
94
  // L, # number of layers
95
  // s, # sequence length
96
  // v, # vocab size
97
  // k=8, # parameters for optimizer (Adam: 8 = 4 bytes moments + 4 bytes variance)
98
  // dp=1, # data parallelism
99
- // zero = "Optimizer", # zero data parallelism
100
  // mixed=True # mixed precision training
101
  console.log('paramGradsOpt called with:', { h, L, s, v, k, dp, zero, mixed });
102
  const emb = h * (v + s);
@@ -113,9 +113,9 @@ export function paramGradsOpt(h, L, s, v, k = 8, dp = 1, zero = "Optimizer", mix
113
  const data = {
114
  name: "ParametersGradientOps",
115
  children: [
116
- { name: 'Parameters', value: zero === "Parameters" ? bytesPerParameter * n / dp : bytesPerParameter * n },
117
- { name: 'Gradients', value: zero === "Gradients" ? bytesPerParameter * n / dp : bytesPerParameter * n },
118
- { name: 'OptimizerAverages', value: zero === "Optimizer" ? k * n / dp : k * n }
119
  ]
120
  };
121
  console.log('paramGradsOpt result:', data);
@@ -319,7 +319,7 @@ function setPresetValues(preset) {
319
  }
320
  });
321
 
322
- updateGraph();
323
  }
324
 
325
  function syncSliderAndInput(sliderId, inputId) {
@@ -371,6 +371,13 @@ export const init_memory_plot = function () {
371
  console.warn('FF Activation select not found');
372
  }
373
 
 
 
 
 
 
 
 
374
  const mixedCheckbox = document.getElementById('mixed');
375
  if (mixedCheckbox) {
376
  mixedCheckbox.addEventListener('change', updateGraph);
 
89
  return data;
90
  }
91
 
92
+ export function paramGradsOpt(h, L, s, v, k = 8, dp = 1, zero = 0, mixed = true) {
93
  // h, # hidden dimension size
94
  // L, # number of layers
95
  // s, # sequence length
96
  // v, # vocab size
97
  // k=8, # parameters for optimizer (Adam: 8 = 4 bytes moments + 4 bytes variance)
98
  // dp=1, # data parallelism
99
+ // zero = 0, 1, 2, 3, # zero data parallelism
100
  // mixed=True # mixed precision training
101
  console.log('paramGradsOpt called with:', { h, L, s, v, k, dp, zero, mixed });
102
  const emb = h * (v + s);
 
113
  const data = {
114
  name: "ParametersGradientOps",
115
  children: [
116
+ { name: 'Parameters', value: zero >= 3 ? bytesPerParameter * n / dp : bytesPerParameter * n },
117
+ { name: 'Gradients', value: zero >= 2 ? bytesPerParameter * n / dp : bytesPerParameter * n },
118
+ { name: 'OptimizerAverages', value: zero >= 1 ? k * n / dp : k * n }
119
  ]
120
  };
121
  console.log('paramGradsOpt result:', data);
 
319
  }
320
  });
321
 
322
+ updateGraph(); // Add this line to ensure the graph updates when a preset is selected
323
  }
324
 
325
  function syncSliderAndInput(sliderId, inputId) {
 
371
  console.warn('FF Activation select not found');
372
  }
373
 
374
+ const zeroSelect = document.getElementById('zero');
375
+ if (zeroSelect) {
376
+ zeroSelect.addEventListener('change', updateGraph);
377
+ } else {
378
+ console.warn('Zero select not found');
379
+ }
380
+
381
  const mixedCheckbox = document.getElementById('mixed');
382
  if (mixedCheckbox) {
383
  mixedCheckbox.addEventListener('change', updateGraph);