SebastianBodza
/

Kartoffel-1B-v0.3

Safetensors

German

llama

Model card Files Files and versions Community

SebastianBodza commited on Feb 7

Commit

86de3ec

verified ·

1 Parent(s): 81a23b6

Add files using upload-large-folder tool

Browse files

Files changed (2) hide show

model.safetensors +1 -1
trainer_state.json +711 -3

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7dbc119d572de9d50e04e2f7f782a2227240cda1ef8b3619fb1b372581bcdca5
 size 2740113872

 version https://git-lfs.github.com/spec/v1
+oid sha256:251592d6646f19c540dca28cb759c20bbddf9b68e86c390c0300386f84e83a8c
 size 2740113872

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.21701141657213197,
   "eval_steps": 1000,
-  "global_step": 2997,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2124,6 +2124,714 @@
       "learning_rate": 4.557779879023483e-05,
       "loss": 6.5202,
       "step": 2990
     }
   ],
   "logging_steps": 10,
@@ -2143,7 +2851,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.450873098647372e+18,
   "train_batch_size": 30,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2893485554295093,
   "eval_steps": 1000,
+  "global_step": 3996,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.557779879023483e-05,
       "loss": 6.5202,
       "step": 2990
+    },
+    {
+      "epoch": 0.21722864521734933,
+      "grad_norm": 1.328125,
+      "learning_rate": 4.554444538063113e-05,
+      "loss": 6.5246,
+      "step": 3000
+    },
+    {
+      "epoch": 0.21722864521734933,
+      "eval_loss": 6.682950973510742,
+      "eval_runtime": 1.4758,
+      "eval_samples_per_second": 8.131,
+      "eval_steps_per_second": 2.033,
+      "step": 3000
+    },
+    {
+      "epoch": 0.21795274070140716,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.551097896319838e-05,
+      "loss": 6.5144,
+      "step": 3010
+    },
+    {
+      "epoch": 0.218676836185465,
+      "grad_norm": 1.3671875,
+      "learning_rate": 4.5477399722023674e-05,
+      "loss": 6.5295,
+      "step": 3020
+    },
+    {
+      "epoch": 0.21940093166952282,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.5443707841814695e-05,
+      "loss": 6.5278,
+      "step": 3030
+    },
+    {
+      "epoch": 0.22012502715358065,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.540990350789875e-05,
+      "loss": 6.5584,
+      "step": 3040
+    },
+    {
+      "epoch": 0.22084912263763848,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.537598690622167e-05,
+      "loss": 6.5528,
+      "step": 3050
+    },
+    {
+      "epoch": 0.2215732181216963,
+      "grad_norm": 1.203125,
+      "learning_rate": 4.534195822334686e-05,
+      "loss": 6.5392,
+      "step": 3060
+    },
+    {
+      "epoch": 0.22229731360575414,
+      "grad_norm": 1.3984375,
+      "learning_rate": 4.530781764645422e-05,
+      "loss": 6.5403,
+      "step": 3070
+    },
+    {
+      "epoch": 0.22302140908981197,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.5273565363339185e-05,
+      "loss": 6.5396,
+      "step": 3080
+    },
+    {
+      "epoch": 0.2237455045738698,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.52392015624116e-05,
+      "loss": 6.5249,
+      "step": 3090
+    },
+    {
+      "epoch": 0.22446960005792763,
+      "grad_norm": 1.25,
+      "learning_rate": 4.520472643269477e-05,
+      "loss": 6.5199,
+      "step": 3100
+    },
+    {
+      "epoch": 0.22519369554198548,
+      "grad_norm": 1.375,
+      "learning_rate": 4.517014016382432e-05,
+      "loss": 6.5302,
+      "step": 3110
+    },
+    {
+      "epoch": 0.2259177910260433,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.5135442946047294e-05,
+      "loss": 6.5274,
+      "step": 3120
+    },
+    {
+      "epoch": 0.22664188651010114,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.5100634970220967e-05,
+      "loss": 6.5431,
+      "step": 3130
+    },
+    {
+      "epoch": 0.22736598199415897,
+      "grad_norm": 1.2421875,
+      "learning_rate": 4.5065716427811874e-05,
+      "loss": 6.5362,
+      "step": 3140
+    },
+    {
+      "epoch": 0.2280900774782168,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.503068751089474e-05,
+      "loss": 6.5307,
+      "step": 3150
+    },
+    {
+      "epoch": 0.22881417296227463,
+      "grad_norm": 1.296875,
+      "learning_rate": 4.499554841215143e-05,
+      "loss": 6.5172,
+      "step": 3160
+    },
+    {
+      "epoch": 0.22953826844633246,
+      "grad_norm": 1.203125,
+      "learning_rate": 4.496029932486986e-05,
+      "loss": 6.521,
+      "step": 3170
+    },
+    {
+      "epoch": 0.2302623639303903,
+      "grad_norm": 1.1796875,
+      "learning_rate": 4.492494044294297e-05,
+      "loss": 6.5346,
+      "step": 3180
+    },
+    {
+      "epoch": 0.23098645941444812,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.4889471960867635e-05,
+      "loss": 6.5422,
+      "step": 3190
+    },
+    {
+      "epoch": 0.23171055489850595,
+      "grad_norm": 1.34375,
+      "learning_rate": 4.485389407374361e-05,
+      "loss": 6.5498,
+      "step": 3200
+    },
+    {
+      "epoch": 0.23243465038256378,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.481820697727244e-05,
+      "loss": 6.523,
+      "step": 3210
+    },
+    {
+      "epoch": 0.2331587458666216,
+      "grad_norm": 1.1875,
+      "learning_rate": 4.47824108677564e-05,
+      "loss": 6.5477,
+      "step": 3220
+    },
+    {
+      "epoch": 0.23388284135067944,
+      "grad_norm": 1.3359375,
+      "learning_rate": 4.47465059420974e-05,
+      "loss": 6.5379,
+      "step": 3230
+    },
+    {
+      "epoch": 0.23460693683473727,
+      "grad_norm": 1.25,
+      "learning_rate": 4.471049239779592e-05,
+      "loss": 6.5389,
+      "step": 3240
+    },
+    {
+      "epoch": 0.2353310323187951,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.4674370432949905e-05,
+      "loss": 6.5552,
+      "step": 3250
+    },
+    {
+      "epoch": 0.23605512780285293,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.463814024625368e-05,
+      "loss": 6.5114,
+      "step": 3260
+    },
+    {
+      "epoch": 0.23677922328691076,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.460180203699688e-05,
+      "loss": 6.5101,
+      "step": 3270
+    },
+    {
+      "epoch": 0.23750331877096859,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.4565356005063304e-05,
+      "loss": 6.5051,
+      "step": 3280
+    },
+    {
+      "epoch": 0.23822741425502644,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.452880235092987e-05,
+      "loss": 6.5213,
+      "step": 3290
+    },
+    {
+      "epoch": 0.23895150973908427,
+      "grad_norm": 1.25,
+      "learning_rate": 4.449214127566549e-05,
+      "loss": 6.5246,
+      "step": 3300
+    },
+    {
+      "epoch": 0.2396756052231421,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.4455372980929935e-05,
+      "loss": 6.5309,
+      "step": 3310
+    },
+    {
+      "epoch": 0.24039970070719993,
+      "grad_norm": 1.359375,
+      "learning_rate": 4.4418497668972785e-05,
+      "loss": 6.5349,
+      "step": 3320
+    },
+    {
+      "epoch": 0.24112379619125776,
+      "grad_norm": 1.2421875,
+      "learning_rate": 4.4381515542632274e-05,
+      "loss": 6.5232,
+      "step": 3330
+    },
+    {
+      "epoch": 0.2418478916753156,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.434442680533417e-05,
+      "loss": 6.518,
+      "step": 3340
+    },
+    {
+      "epoch": 0.24257198715937342,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.430723166109069e-05,
+      "loss": 6.5465,
+      "step": 3350
+    },
+    {
+      "epoch": 0.24329608264343125,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.426993031449934e-05,
+      "loss": 6.5353,
+      "step": 3360
+    },
+    {
+      "epoch": 0.24402017812748908,
+      "grad_norm": 1.234375,
+      "learning_rate": 4.423252297074183e-05,
+      "loss": 6.5499,
+      "step": 3370
+    },
+    {
+      "epoch": 0.2447442736115469,
+      "grad_norm": 1.3203125,
+      "learning_rate": 4.41950098355829e-05,
+      "loss": 6.5203,
+      "step": 3380
+    },
+    {
+      "epoch": 0.24546836909560474,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.415739111536924e-05,
+      "loss": 6.5392,
+      "step": 3390
+    },
+    {
+      "epoch": 0.24619246457966257,
+      "grad_norm": 1.3515625,
+      "learning_rate": 4.4119667017028297e-05,
+      "loss": 6.5197,
+      "step": 3400
+    },
+    {
+      "epoch": 0.2469165600637204,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.4081837748067186e-05,
+      "loss": 6.4986,
+      "step": 3410
+    },
+    {
+      "epoch": 0.24764065554777823,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.404390351657153e-05,
+      "loss": 6.5238,
+      "step": 3420
+    },
+    {
+      "epoch": 0.24836475103183606,
+      "grad_norm": 1.25,
+      "learning_rate": 4.4005864531204285e-05,
+      "loss": 6.5473,
+      "step": 3430
+    },
+    {
+      "epoch": 0.24908884651589389,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.396772100120466e-05,
+      "loss": 6.5189,
+      "step": 3440
+    },
+    {
+      "epoch": 0.24981294199995172,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.39294731363869e-05,
+      "loss": 6.5228,
+      "step": 3450
+    },
+    {
+      "epoch": 0.2505370374840096,
+      "grad_norm": 1.296875,
+      "learning_rate": 4.389112114713918e-05,
+      "loss": 6.5224,
+      "step": 3460
+    },
+    {
+      "epoch": 0.2512611329680674,
+      "grad_norm": 1.3359375,
+      "learning_rate": 4.385266524442241e-05,
+      "loss": 6.5229,
+      "step": 3470
+    },
+    {
+      "epoch": 0.25198522845212523,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.3814105639769106e-05,
+      "loss": 6.54,
+      "step": 3480
+    },
+    {
+      "epoch": 0.25270932393618306,
+      "grad_norm": 1.34375,
+      "learning_rate": 4.37754425452822e-05,
+      "loss": 6.5226,
+      "step": 3490
+    },
+    {
+      "epoch": 0.2534334194202409,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.373667617363389e-05,
+      "loss": 6.5276,
+      "step": 3500
+    },
+    {
+      "epoch": 0.2541575149042987,
+      "grad_norm": 1.3828125,
+      "learning_rate": 4.369780673806447e-05,
+      "loss": 6.534,
+      "step": 3510
+    },
+    {
+      "epoch": 0.25488161038835655,
+      "grad_norm": 1.203125,
+      "learning_rate": 4.365883445238116e-05,
+      "loss": 6.5317,
+      "step": 3520
+    },
+    {
+      "epoch": 0.2556057058724144,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.361975953095689e-05,
+      "loss": 6.5119,
+      "step": 3530
+    },
+    {
+      "epoch": 0.2563298013564722,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.358058218872918e-05,
+      "loss": 6.5281,
+      "step": 3540
+    },
+    {
+      "epoch": 0.25705389684053004,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.354130264119894e-05,
+      "loss": 6.5284,
+      "step": 3550
+    },
+    {
+      "epoch": 0.25777799232458787,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.350192110442926e-05,
+      "loss": 6.5324,
+      "step": 3560
+    },
+    {
+      "epoch": 0.2585020878086457,
+      "grad_norm": 1.2265625,
+      "learning_rate": 4.346243779504421e-05,
+      "loss": 6.523,
+      "step": 3570
+    },
+    {
+      "epoch": 0.2592261832927035,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.342285293022775e-05,
+      "loss": 6.5267,
+      "step": 3580
+    },
+    {
+      "epoch": 0.25995027877676136,
+      "grad_norm": 1.2265625,
+      "learning_rate": 4.338316672772238e-05,
+      "loss": 6.5263,
+      "step": 3590
+    },
+    {
+      "epoch": 0.2606743742608192,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.334337940582808e-05,
+      "loss": 6.5438,
+      "step": 3600
+    },
+    {
+      "epoch": 0.261398469744877,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.330349118340102e-05,
+      "loss": 6.5315,
+      "step": 3610
+    },
+    {
+      "epoch": 0.26212256522893485,
+      "grad_norm": 1.203125,
+      "learning_rate": 4.326350227985241e-05,
+      "loss": 6.5183,
+      "step": 3620
+    },
+    {
+      "epoch": 0.2628466607129927,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.3223412915147254e-05,
+      "loss": 6.5214,
+      "step": 3630
+    },
+    {
+      "epoch": 0.2635707561970505,
+      "grad_norm": 1.3515625,
+      "learning_rate": 4.318322330980317e-05,
+      "loss": 6.531,
+      "step": 3640
+    },
+    {
+      "epoch": 0.26429485168110833,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.314293368488915e-05,
+      "loss": 6.5133,
+      "step": 3650
+    },
+    {
+      "epoch": 0.26501894716516616,
+      "grad_norm": 1.25,
+      "learning_rate": 4.3102544262024394e-05,
+      "loss": 6.5288,
+      "step": 3660
+    },
+    {
+      "epoch": 0.265743042649224,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.3062055263377e-05,
+      "loss": 6.4998,
+      "step": 3670
+    },
+    {
+      "epoch": 0.2664671381332818,
+      "grad_norm": 1.3203125,
+      "learning_rate": 4.302146691166286e-05,
+      "loss": 6.5302,
+      "step": 3680
+    },
+    {
+      "epoch": 0.26719123361733965,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.298077943014431e-05,
+      "loss": 6.5053,
+      "step": 3690
+    },
+    {
+      "epoch": 0.2679153291013975,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.293999304262902e-05,
+      "loss": 6.5242,
+      "step": 3700
+    },
+    {
+      "epoch": 0.2686394245854553,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.289910797346868e-05,
+      "loss": 6.5061,
+      "step": 3710
+    },
+    {
+      "epoch": 0.26936352006951314,
+      "grad_norm": 1.25,
+      "learning_rate": 4.285812444755779e-05,
+      "loss": 6.5257,
+      "step": 3720
+    },
+    {
+      "epoch": 0.27008761555357097,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.281704269033242e-05,
+      "loss": 6.5269,
+      "step": 3730
+    },
+    {
+      "epoch": 0.27081171103762885,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.2775862927769025e-05,
+      "loss": 6.4974,
+      "step": 3740
+    },
+    {
+      "epoch": 0.2715358065216867,
+      "grad_norm": 1.3671875,
+      "learning_rate": 4.2734585386383086e-05,
+      "loss": 6.4876,
+      "step": 3750
+    },
+    {
+      "epoch": 0.2722599020057445,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.269321029322797e-05,
+      "loss": 6.5099,
+      "step": 3760
+    },
+    {
+      "epoch": 0.27298399748980234,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.265173787589364e-05,
+      "loss": 6.5222,
+      "step": 3770
+    },
+    {
+      "epoch": 0.2737080929738602,
+      "grad_norm": 1.2109375,
+      "learning_rate": 4.2610168362505395e-05,
+      "loss": 6.5077,
+      "step": 3780
+    },
+    {
+      "epoch": 0.274432188457918,
+      "grad_norm": 1.203125,
+      "learning_rate": 4.256850198172263e-05,
+      "loss": 6.5283,
+      "step": 3790
+    },
+    {
+      "epoch": 0.27515628394197583,
+      "grad_norm": 1.171875,
+      "learning_rate": 4.252673896273758e-05,
+      "loss": 6.5135,
+      "step": 3800
+    },
+    {
+      "epoch": 0.27588037942603366,
+      "grad_norm": 1.203125,
+      "learning_rate": 4.248487953527404e-05,
+      "loss": 6.5123,
+      "step": 3810
+    },
+    {
+      "epoch": 0.2766044749100915,
+      "grad_norm": 1.21875,
+      "learning_rate": 4.244292392958613e-05,
+      "loss": 6.5163,
+      "step": 3820
+    },
+    {
+      "epoch": 0.2773285703941493,
+      "grad_norm": 1.3203125,
+      "learning_rate": 4.2400872376457e-05,
+      "loss": 6.5063,
+      "step": 3830
+    },
+    {
+      "epoch": 0.27805266587820715,
+      "grad_norm": 1.2734375,
+      "learning_rate": 4.2358725107197576e-05,
+      "loss": 6.5214,
+      "step": 3840
+    },
+    {
+      "epoch": 0.278776761362265,
+      "grad_norm": 1.3046875,
+      "learning_rate": 4.231648235364529e-05,
+      "loss": 6.5375,
+      "step": 3850
+    },
+    {
+      "epoch": 0.2795008568463228,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.227414434816279e-05,
+      "loss": 6.5003,
+      "step": 3860
+    },
+    {
+      "epoch": 0.28022495233038064,
+      "grad_norm": 1.234375,
+      "learning_rate": 4.22317113236367e-05,
+      "loss": 6.5425,
+      "step": 3870
+    },
+    {
+      "epoch": 0.28094904781443847,
+      "grad_norm": 1.1953125,
+      "learning_rate": 4.218918351347626e-05,
+      "loss": 6.5073,
+      "step": 3880
+    },
+    {
+      "epoch": 0.2816731432984963,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.214656115161215e-05,
+      "loss": 6.5127,
+      "step": 3890
+    },
+    {
+      "epoch": 0.2823972387825541,
+      "grad_norm": 1.2265625,
+      "learning_rate": 4.210384447249509e-05,
+      "loss": 6.5198,
+      "step": 3900
+    },
+    {
+      "epoch": 0.28312133426661196,
+      "grad_norm": 1.234375,
+      "learning_rate": 4.2061033711094655e-05,
+      "loss": 6.5206,
+      "step": 3910
+    },
+    {
+      "epoch": 0.2838454297506698,
+      "grad_norm": 1.3125,
+      "learning_rate": 4.2018129102897904e-05,
+      "loss": 6.4952,
+      "step": 3920
+    },
+    {
+      "epoch": 0.2845695252347276,
+      "grad_norm": 1.2265625,
+      "learning_rate": 4.197513088390813e-05,
+      "loss": 6.5065,
+      "step": 3930
+    },
+    {
+      "epoch": 0.28529362071878545,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.193203929064353e-05,
+      "loss": 6.4917,
+      "step": 3940
+    },
+    {
+      "epoch": 0.2860177162028433,
+      "grad_norm": 1.28125,
+      "learning_rate": 4.1888854560135934e-05,
+      "loss": 6.5252,
+      "step": 3950
+    },
+    {
+      "epoch": 0.2867418116869011,
+      "grad_norm": 1.2578125,
+      "learning_rate": 4.1845576929929486e-05,
+      "loss": 6.4974,
+      "step": 3960
+    },
+    {
+      "epoch": 0.28746590717095893,
+      "grad_norm": 1.234375,
+      "learning_rate": 4.180220663807934e-05,
+      "loss": 6.5253,
+      "step": 3970
+    },
+    {
+      "epoch": 0.28819000265501676,
+      "grad_norm": 1.265625,
+      "learning_rate": 4.175874392315033e-05,
+      "loss": 6.4986,
+      "step": 3980
+    },
+    {
+      "epoch": 0.2889140981390746,
+      "grad_norm": 1.3203125,
+      "learning_rate": 4.1715189024215716e-05,
+      "loss": 6.5147,
+      "step": 3990
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.601164131082437e+18,
   "train_batch_size": 30,
   "trial_name": null,
   "trial_params": null