Safetensors
llama
amang1802 commited on
Commit
d2821f2
·
verified ·
1 Parent(s): 7fdc354

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +97 -0
README.md ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - amang1802/synthetic_data_qna_fulltext_conditioned_L3.3_70B_deduped
5
+ base_model:
6
+ - meta-llama/Llama-3.1-8B
7
+ ---
8
+
9
+ - Trained using torchtune for CPT testing
10
+ - Shows a good improvement in Ground Truth Accuracy when using Q&A dataset instead of just wiki like text: 23% to 31%
11
+
12
+ ---
13
+
14
+ Torchtune logs
15
+
16
+ ```
17
+ Step 1 | loss:1.1550652980804443 lr:1e-05 tokens_per_second_per_gpu:7858.67578125 peak_memory_active:78.02358818054199 peak_memory_alloc:78.02358818054199 peak_memory_reserved:100.0546875
18
+ Step 2 | loss:1.074491024017334 lr:1e-05 tokens_per_second_per_gpu:9497.6376953125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
19
+ Step 3 | loss:1.036757230758667 lr:1e-05 tokens_per_second_per_gpu:9443.421875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
20
+ Step 4 | loss:1.0045448541641235 lr:1e-05 tokens_per_second_per_gpu:9472.8544921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
21
+ Step 5 | loss:0.9949342012405396 lr:1e-05 tokens_per_second_per_gpu:1754.5042724609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
22
+ Step 6 | loss:0.9762706756591797 lr:1e-05 tokens_per_second_per_gpu:9503.0185546875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
23
+ Step 7 | loss:0.9750452637672424 lr:1e-05 tokens_per_second_per_gpu:9493.833984375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
24
+ Step 8 | loss:0.9748913049697876 lr:1e-05 tokens_per_second_per_gpu:9464.8017578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
25
+ Step 9 | loss:0.9671335816383362 lr:1e-05 tokens_per_second_per_gpu:1682.2664794921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
26
+ Step 10 | loss:0.9558936953544617 lr:1e-05 tokens_per_second_per_gpu:9502.4892578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
27
+ Step 11 | loss:0.9583306312561035 lr:1e-05 tokens_per_second_per_gpu:9477.5849609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
28
+ Step 12 | loss:0.93567955493927 lr:1e-05 tokens_per_second_per_gpu:9432.6015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
29
+ Step 13 | loss:0.9410796761512756 lr:1e-05 tokens_per_second_per_gpu:1682.098388671875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
30
+ Step 14 | loss:0.9390289783477783 lr:1e-05 tokens_per_second_per_gpu:9453.6650390625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
31
+ Step 15 | loss:0.926663875579834 lr:1e-05 tokens_per_second_per_gpu:9473.9384765625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
32
+ Step 16 | loss:0.927723228931427 lr:1e-05 tokens_per_second_per_gpu:9470.412109375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
33
+ Step 17 | loss:0.920567512512207 lr:1e-05 tokens_per_second_per_gpu:1708.1058349609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
34
+ Step 18 | loss:0.9150664210319519 lr:1e-05 tokens_per_second_per_gpu:9478.974609375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
35
+ Step 19 | loss:0.924900472164154 lr:1e-05 tokens_per_second_per_gpu:9455.2490234375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
36
+ Step 20 | loss:0.9017823934555054 lr:1e-05 tokens_per_second_per_gpu:9454.6162109375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
37
+ Step 21 | loss:0.9159845113754272 lr:1e-05 tokens_per_second_per_gpu:1662.1505126953125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
38
+ Step 22 | loss:0.8888995051383972 lr:1e-05 tokens_per_second_per_gpu:9452.20703125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
39
+ Step 23 | loss:0.8926591873168945 lr:1e-05 tokens_per_second_per_gpu:9485.6240234375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
40
+ Step 24 | loss:0.886618971824646 lr:1e-05 tokens_per_second_per_gpu:9464.8369140625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
41
+ Step 25 | loss:0.892602801322937 lr:1e-05 tokens_per_second_per_gpu:1685.84033203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
42
+ Step 26 | loss:0.8850957751274109 lr:1e-05 tokens_per_second_per_gpu:9490.6328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
43
+ Step 27 | loss:0.8848649263381958 lr:1e-05 tokens_per_second_per_gpu:9457.08203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
44
+ Step 28 | loss:0.8875665068626404 lr:1e-05 tokens_per_second_per_gpu:9455.3544921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
45
+ Step 29 | loss:0.8806608319282532 lr:1e-05 tokens_per_second_per_gpu:1645.5791015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
46
+ Step 30 | loss:0.8724529147148132 lr:1e-05 tokens_per_second_per_gpu:9455.6884765625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
47
+ Step 31 | loss:0.8731796741485596 lr:1e-05 tokens_per_second_per_gpu:9487.953125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
48
+ Step 32 | loss:0.8599528074264526 lr:1e-05 tokens_per_second_per_gpu:9456.908203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
49
+ Step 33 | loss:0.8691402673721313 lr:1e-05 tokens_per_second_per_gpu:1703.8731689453125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
50
+ Step 34 | loss:0.8764164447784424 lr:1e-05 tokens_per_second_per_gpu:9494.8505859375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
51
+ Step 35 | loss:0.8447176814079285 lr:1e-05 tokens_per_second_per_gpu:9463.6650390625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
52
+ Step 36 | loss:0.8470439314842224 lr:1e-05 tokens_per_second_per_gpu:9428.8984375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
53
+ Step 37 | loss:0.8530339002609253 lr:1e-05 tokens_per_second_per_gpu:1679.0570068359375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
54
+ Step 38 | loss:0.8404569625854492 lr:1e-05 tokens_per_second_per_gpu:9484.4853515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
55
+ Step 39 | loss:0.838159441947937 lr:1e-05 tokens_per_second_per_gpu:9439.072265625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
56
+ Step 40 | loss:0.8427407741546631 lr:1e-05 tokens_per_second_per_gpu:9468.4130859375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
57
+ Step 41 | loss:0.8266391754150391 lr:1e-05 tokens_per_second_per_gpu:1667.6983642578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
58
+ Step 42 | loss:0.8309320211410522 lr:1e-05 tokens_per_second_per_gpu:9479.7646484375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
59
+ Step 43 | loss:0.8297097086906433 lr:1e-05 tokens_per_second_per_gpu:9458.6044921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
60
+ Step 44 | loss:0.8245887756347656 lr:1e-05 tokens_per_second_per_gpu:9457.189453125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
61
+ Step 45 | loss:0.805065929889679 lr:1e-05 tokens_per_second_per_gpu:1670.8603515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
62
+ Step 46 | loss:0.8292020559310913 lr:1e-05 tokens_per_second_per_gpu:9480.060546875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
63
+ Step 47 | loss:0.8125108480453491 lr:1e-05 tokens_per_second_per_gpu:9438.6904296875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
64
+ Step 48 | loss:0.8038235902786255 lr:1e-05 tokens_per_second_per_gpu:9451.693359375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
65
+ Step 49 | loss:0.8112599849700928 lr:1e-05 tokens_per_second_per_gpu:1674.599853515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
66
+ Step 50 | loss:0.7910869717597961 lr:1e-05 tokens_per_second_per_gpu:9486.93359375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
67
+ Step 51 | loss:0.8089133501052856 lr:1e-05 tokens_per_second_per_gpu:9471.6611328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
68
+ Step 52 | loss:0.7902357578277588 lr:1e-05 tokens_per_second_per_gpu:9469.2421875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
69
+ Step 53 | loss:0.7818056344985962 lr:1e-05 tokens_per_second_per_gpu:1700.8087158203125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
70
+ Step 54 | loss:0.7916420698165894 lr:1e-05 tokens_per_second_per_gpu:9477.4853515625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
71
+ Step 55 | loss:0.7897423505783081 lr:1e-05 tokens_per_second_per_gpu:9472.4951171875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
72
+ Step 56 | loss:0.786994218826294 lr:1e-05 tokens_per_second_per_gpu:9428.8291015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
73
+ Step 57 | loss:0.7760741114616394 lr:1e-05 tokens_per_second_per_gpu:1659.400390625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
74
+ Step 58 | loss:0.7682285308837891 lr:1e-05 tokens_per_second_per_gpu:9484.841796875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
75
+ Step 59 | loss:0.7774873971939087 lr:1e-05 tokens_per_second_per_gpu:9465.6552734375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
76
+ Step 60 | loss:0.7679336071014404 lr:1e-05 tokens_per_second_per_gpu:9458.9755859375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
77
+ Step 61 | loss:0.7739102244377136 lr:1e-05 tokens_per_second_per_gpu:1685.2352294921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
78
+ Step 62 | loss:0.7486407160758972 lr:1e-05 tokens_per_second_per_gpu:9491.0732421875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
79
+ Step 63 | loss:0.7589751482009888 lr:1e-05 tokens_per_second_per_gpu:9478.44921875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
80
+ Step 64 | loss:0.7364526987075806 lr:1e-05 tokens_per_second_per_gpu:9495.0029296875 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
81
+ Step 65 | loss:0.7489485144615173 lr:1e-05 tokens_per_second_per_gpu:1672.576416015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
82
+ Step 66 | loss:0.734795868396759 lr:1e-05 tokens_per_second_per_gpu:9490.7041015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
83
+ Step 67 | loss:0.7311883568763733 lr:1e-05 tokens_per_second_per_gpu:9467.4345703125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
84
+ Step 68 | loss:0.7330002784729004 lr:1e-05 tokens_per_second_per_gpu:9487.4697265625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
85
+ Step 69 | loss:0.7234662771224976 lr:1e-05 tokens_per_second_per_gpu:1654.3092041015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
86
+ Step 70 | loss:0.7276259064674377 lr:1e-05 tokens_per_second_per_gpu:9499.0361328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
87
+ Step 71 | loss:0.7203803658485413 lr:1e-05 tokens_per_second_per_gpu:9486.548828125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
88
+ Step 72 | loss:0.707329511642456 lr:1e-05 tokens_per_second_per_gpu:9480.2265625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
89
+ Step 73 | loss:0.7151223421096802 lr:1e-05 tokens_per_second_per_gpu:1670.900146484375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
90
+ Step 74 | loss:0.7061191201210022 lr:1e-05 tokens_per_second_per_gpu:9475.5166015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
91
+ Step 75 | loss:0.7015465497970581 lr:1e-05 tokens_per_second_per_gpu:9495.412109375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
92
+ Step 76 | loss:0.6949933171272278 lr:1e-05 tokens_per_second_per_gpu:9412.3017578125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
93
+ Step 77 | loss:0.6848528385162354 lr:1e-05 tokens_per_second_per_gpu:1647.98291015625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
94
+ Step 78 | loss:0.6901313066482544 lr:1e-05 tokens_per_second_per_gpu:9489.111328125 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
95
+ Step 79 | loss:0.668023943901062 lr:1e-05 tokens_per_second_per_gpu:9468.5009765625 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
96
+ Step 80 | loss:0.6806164979934692 lr:1e-05 tokens_per_second_per_gpu:9474.2802734375 peak_memory_active:85.53374147415161 peak_memory_alloc:85.53374147415161 peak_memory_reserved:104.0546875
97
+ ```