Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- amang1802/synthetic_data_qna_fulltext_conditioned_L3.3_70B_deduped
|
5 |
+
base_model:
|
6 |
+
- meta-llama/Llama-3.1-8B
|
7 |
+
---
|
8 |
+
|
9 |
+
- Trained using torchtune for CPT testing
|
10 |
+
- Shows a good improvement in Ground Truth Accuracy when using Q&A dataset instead of just wiki like text: 23% to 31%
|
11 |
+
|
12 |
+
---
|
13 |
+
|
14 |
+
Torchtune logs
|
15 |
+
|
16 |
+
```
|
17 |
+
Step 1 | loss:1.1550652980804443 lr:1e-05 tokens_per_second_per_gpu:7858.67578125 peak_memory_active:78.02358818054199 peak_memory_alloc:78.02358818054199 peak_memory_reserved:100.0546875
|
18 |
+
Step 2 | loss:1.074491024017334 lr:1e-05 tokens_per_second_per_gpu:9497.6376953125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
19 |
+
Step 3 | loss:1.036757230758667 lr:1e-05 tokens_per_second_per_gpu:9443.421875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
20 |
+
Step 4 | loss:1.0045448541641235 lr:1e-05 tokens_per_second_per_gpu:9472.8544921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
21 |
+
Step 5 | loss:0.9949342012405396 lr:1e-05 tokens_per_second_per_gpu:1754.5042724609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
22 |
+
Step 6 | loss:0.9762706756591797 lr:1e-05 tokens_per_second_per_gpu:9503.0185546875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
23 |
+
Step 7 | loss:0.9750452637672424 lr:1e-05 tokens_per_second_per_gpu:9493.833984375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
24 |
+
Step 8 | loss:0.9748913049697876 lr:1e-05 tokens_per_second_per_gpu:9464.8017578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
25 |
+
Step 9 | loss:0.9671335816383362 lr:1e-05 tokens_per_second_per_gpu:1682.2664794921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
26 |
+
Step 10 | loss:0.9558936953544617 lr:1e-05 tokens_per_second_per_gpu:9502.4892578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
27 |
+
Step 11 | loss:0.9583306312561035 lr:1e-05 tokens_per_second_per_gpu:9477.5849609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
28 |
+
Step 12 | loss:0.93567955493927 lr:1e-05 tokens_per_second_per_gpu:9432.6015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
29 |
+
Step 13 | loss:0.9410796761512756 lr:1e-05 tokens_per_second_per_gpu:1682.098388671875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
30 |
+
Step 14 | loss:0.9390289783477783 lr:1e-05 tokens_per_second_per_gpu:9453.6650390625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
31 |
+
Step 15 | loss:0.926663875579834 lr:1e-05 tokens_per_second_per_gpu:9473.9384765625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
32 |
+
Step 16 | loss:0.927723228931427 lr:1e-05 tokens_per_second_per_gpu:9470.412109375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
33 |
+
Step 17 | loss:0.920567512512207 lr:1e-05 tokens_per_second_per_gpu:1708.1058349609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
34 |
+
Step 18 | loss:0.9150664210319519 lr:1e-05 tokens_per_second_per_gpu:9478.974609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
35 |
+
Step 19 | loss:0.924900472164154 lr:1e-05 tokens_per_second_per_gpu:9455.2490234375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
36 |
+
Step 20 | loss:0.9017823934555054 lr:1e-05 tokens_per_second_per_gpu:9454.6162109375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
37 |
+
Step 21 | loss:0.9159845113754272 lr:1e-05 tokens_per_second_per_gpu:1662.1505126953125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
38 |
+
Step 22 | loss:0.8888995051383972 lr:1e-05 tokens_per_second_per_gpu:9452.20703125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
39 |
+
Step 23 | loss:0.8926591873168945 lr:1e-05 tokens_per_second_per_gpu:9485.6240234375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
40 |
+
Step 24 | loss:0.886618971824646 lr:1e-05 tokens_per_second_per_gpu:9464.8369140625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
41 |
+
Step 25 | loss:0.892602801322937 lr:1e-05 tokens_per_second_per_gpu:1685.84033203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
42 |
+
Step 26 | loss:0.8850957751274109 lr:1e-05 tokens_per_second_per_gpu:9490.6328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
43 |
+
Step 27 | loss:0.8848649263381958 lr:1e-05 tokens_per_second_per_gpu:9457.08203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
44 |
+
Step 28 | loss:0.8875665068626404 lr:1e-05 tokens_per_second_per_gpu:9455.3544921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
45 |
+
Step 29 | loss:0.8806608319282532 lr:1e-05 tokens_per_second_per_gpu:1645.5791015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
46 |
+
Step 30 | loss:0.8724529147148132 lr:1e-05 tokens_per_second_per_gpu:9455.6884765625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
47 |
+
Step 31 | loss:0.8731796741485596 lr:1e-05 tokens_per_second_per_gpu:9487.953125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
48 |
+
Step 32 | loss:0.8599528074264526 lr:1e-05 tokens_per_second_per_gpu:9456.908203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
49 |
+
Step 33 | loss:0.8691402673721313 lr:1e-05 tokens_per_second_per_gpu:1703.8731689453125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
50 |
+
Step 34 | loss:0.8764164447784424 lr:1e-05 tokens_per_second_per_gpu:9494.8505859375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
51 |
+
Step 35 | loss:0.8447176814079285 lr:1e-05 tokens_per_second_per_gpu:9463.6650390625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
52 |
+
Step 36 | loss:0.8470439314842224 lr:1e-05 tokens_per_second_per_gpu:9428.8984375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
53 |
+
Step 37 | loss:0.8530339002609253 lr:1e-05 tokens_per_second_per_gpu:1679.0570068359375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
54 |
+
Step 38 | loss:0.8404569625854492 lr:1e-05 tokens_per_second_per_gpu:9484.4853515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
55 |
+
Step 39 | loss:0.838159441947937 lr:1e-05 tokens_per_second_per_gpu:9439.072265625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
56 |
+
Step 40 | loss:0.8427407741546631 lr:1e-05 tokens_per_second_per_gpu:9468.4130859375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
57 |
+
Step 41 | loss:0.8266391754150391 lr:1e-05 tokens_per_second_per_gpu:1667.6983642578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
58 |
+
Step 42 | loss:0.8309320211410522 lr:1e-05 tokens_per_second_per_gpu:9479.7646484375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
59 |
+
Step 43 | loss:0.8297097086906433 lr:1e-05 tokens_per_second_per_gpu:9458.6044921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
60 |
+
Step 44 | loss:0.8245887756347656 lr:1e-05 tokens_per_second_per_gpu:9457.189453125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
61 |
+
Step 45 | loss:0.805065929889679 lr:1e-05 tokens_per_second_per_gpu:1670.8603515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
62 |
+
Step 46 | loss:0.8292020559310913 lr:1e-05 tokens_per_second_per_gpu:9480.060546875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
63 |
+
Step 47 | loss:0.8125108480453491 lr:1e-05 tokens_per_second_per_gpu:9438.6904296875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
64 |
+
Step 48 | loss:0.8038235902786255 lr:1e-05 tokens_per_second_per_gpu:9451.693359375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
65 |
+
Step 49 | loss:0.8112599849700928 lr:1e-05 tokens_per_second_per_gpu:1674.599853515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
66 |
+
Step 50 | loss:0.7910869717597961 lr:1e-05 tokens_per_second_per_gpu:9486.93359375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
67 |
+
Step 51 | loss:0.8089133501052856 lr:1e-05 tokens_per_second_per_gpu:9471.6611328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
68 |
+
Step 52 | loss:0.7902357578277588 lr:1e-05 tokens_per_second_per_gpu:9469.2421875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
69 |
+
Step 53 | loss:0.7818056344985962 lr:1e-05 tokens_per_second_per_gpu:1700.8087158203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
70 |
+
Step 54 | loss:0.7916420698165894 lr:1e-05 tokens_per_second_per_gpu:9477.4853515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
71 |
+
Step 55 | loss:0.7897423505783081 lr:1e-05 tokens_per_second_per_gpu:9472.4951171875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
72 |
+
Step 56 | loss:0.786994218826294 lr:1e-05 tokens_per_second_per_gpu:9428.8291015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
73 |
+
Step 57 | loss:0.7760741114616394 lr:1e-05 tokens_per_second_per_gpu:1659.400390625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
74 |
+
Step 58 | loss:0.7682285308837891 lr:1e-05 tokens_per_second_per_gpu:9484.841796875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
75 |
+
Step 59 | loss:0.7774873971939087 lr:1e-05 tokens_per_second_per_gpu:9465.6552734375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
76 |
+
Step 60 | loss:0.7679336071014404 lr:1e-05 tokens_per_second_per_gpu:9458.9755859375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
77 |
+
Step 61 | loss:0.7739102244377136 lr:1e-05 tokens_per_second_per_gpu:1685.2352294921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
78 |
+
Step 62 | loss:0.7486407160758972 lr:1e-05 tokens_per_second_per_gpu:9491.0732421875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
79 |
+
Step 63 | loss:0.7589751482009888 lr:1e-05 tokens_per_second_per_gpu:9478.44921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
80 |
+
Step 64 | loss:0.7364526987075806 lr:1e-05 tokens_per_second_per_gpu:9495.0029296875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
81 |
+
Step 65 | loss:0.7489485144615173 lr:1e-05 tokens_per_second_per_gpu:1672.576416015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
82 |
+
Step 66 | loss:0.734795868396759 lr:1e-05 tokens_per_second_per_gpu:9490.7041015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
83 |
+
Step 67 | loss:0.7311883568763733 lr:1e-05 tokens_per_second_per_gpu:9467.4345703125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
84 |
+
Step 68 | loss:0.7330002784729004 lr:1e-05 tokens_per_second_per_gpu:9487.4697265625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
85 |
+
Step 69 | loss:0.7234662771224976 lr:1e-05 tokens_per_second_per_gpu:1654.3092041015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
86 |
+
Step 70 | loss:0.7276259064674377 lr:1e-05 tokens_per_second_per_gpu:9499.0361328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
87 |
+
Step 71 | loss:0.7203803658485413 lr:1e-05 tokens_per_second_per_gpu:9486.548828125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
88 |
+
Step 72 | loss:0.707329511642456 lr:1e-05 tokens_per_second_per_gpu:9480.2265625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
89 |
+
Step 73 | loss:0.7151223421096802 lr:1e-05 tokens_per_second_per_gpu:1670.900146484375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
90 |
+
Step 74 | loss:0.7061191201210022 lr:1e-05 tokens_per_second_per_gpu:9475.5166015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
91 |
+
Step 75 | loss:0.7015465497970581 lr:1e-05 tokens_per_second_per_gpu:9495.412109375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
92 |
+
Step 76 | loss:0.6949933171272278 lr:1e-05 tokens_per_second_per_gpu:9412.3017578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
93 |
+
Step 77 | loss:0.6848528385162354 lr:1e-05 tokens_per_second_per_gpu:1647.98291015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
94 |
+
Step 78 | loss:0.6901313066482544 lr:1e-05 tokens_per_second_per_gpu:9489.111328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
95 |
+
Step 79 | loss:0.668023943901062 lr:1e-05 tokens_per_second_per_gpu:9468.5009765625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
96 |
+
Step 80 | loss:0.6806164979934692 lr:1e-05 tokens_per_second_per_gpu:9474.2802734375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
|
97 |
+
```
|