Further pre-training to 2650K steps
Browse files- config.gin +1 -1
- config.json +3 -2
- flax_model.msgpack +2 -2
- generation_config.json +7 -0
- model.safetensors +3 -0
- train/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.0.v2 +2 -2
- train/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.0.v2 +3 -0
- train/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.0.v2 +3 -0
- train/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.0.v2 +3 -0
- train/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.0.v2 +3 -0
- train/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.0.v2 +3 -0
- training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.1.v2 +2 -2
- training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.1.v2 +3 -0
- training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.1.v2 +3 -0
- training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.1.v2 +3 -0
- training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.1.v2 +3 -0
- training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.1.v2 +3 -0
- training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.2.v2 +2 -2
- training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.2.v2 +3 -0
- training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.2.v2 +3 -0
- training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.2.v2 +3 -0
- training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.2.v2 +3 -0
- training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.2.v2 +3 -0
config.gin
CHANGED
@@ -26,7 +26,7 @@ OPTIMIZER = @adafactor.Adafactor()
|
|
26 |
RANDOM_SEED = None
|
27 |
SHUFFLE_TRAIN_EXAMPLES = True
|
28 |
TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
|
29 |
-
TRAIN_STEPS =
|
30 |
USE_CACHED_TASKS = False
|
31 |
USE_HARDWARE_RNG = False
|
32 |
VOCABULARY = @seqio.SentencePieceVocabulary()
|
|
|
26 |
RANDOM_SEED = None
|
27 |
SHUFFLE_TRAIN_EXAMPLES = True
|
28 |
TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
|
29 |
+
TRAIN_STEPS = 4000000
|
30 |
USE_CACHED_TASKS = False
|
31 |
USE_HARDWARE_RNG = False
|
32 |
VOCABULARY = @seqio.SentencePieceVocabulary()
|
config.json
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
|
|
6 |
"d_ff": 2816,
|
7 |
"d_kv": 64,
|
8 |
"d_model": 1024,
|
@@ -26,7 +27,7 @@
|
|
26 |
"relative_attention_num_buckets": 32,
|
27 |
"tie_word_embeddings": false,
|
28 |
"torch_dtype": "float32",
|
29 |
-
"transformers_version": "4.
|
30 |
"use_cache": true,
|
31 |
"vocab_size": 32128
|
32 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "hf/ul2-large-dutch-english",
|
3 |
"architectures": [
|
4 |
"T5ForConditionalGeneration"
|
5 |
],
|
6 |
+
"classifier_dropout": 0.0,
|
7 |
"d_ff": 2816,
|
8 |
"d_kv": 64,
|
9 |
"d_model": 1024,
|
|
|
27 |
"relative_attention_num_buckets": 32,
|
28 |
"tie_word_embeddings": false,
|
29 |
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.35.0",
|
31 |
"use_cache": true,
|
32 |
"vocab_size": 32128
|
33 |
}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a77bb6fd4c648157c927ec672b72e04949ca3f695828e176c04f2442e3a7833
|
3 |
+
size 1632372682
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"decoder_start_token_id": 0,
|
4 |
+
"eos_token_id": 1,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.35.0"
|
7 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:107a338f3dee570eafa21672271f84a71da0de9600ad03ebd78d9f940a4559c4
|
3 |
+
size 3132668808
|
train/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.0.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2c0d700c69afe0c0a873d63451c08ea30f08c6ec9456d41d8d4a5f31e949307
|
3 |
+
size 9937135
|
train/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf52561a559563b77e0f01ff8a70a5cc9a17d90fdcf3f78201d8343e4ff80d0b
|
3 |
+
size 1066848
|
train/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64c910f3937c1ec93baacf7eb54dfc85891b01344ed0a55711349f13d27478e3
|
3 |
+
size 15879626
|
train/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49ea465f4d3a7a1fea16be708b30ea5aad00c69057940c8bb1c5604e32a3a337
|
3 |
+
size 3978579
|
train/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38a56bcaf345d286a33a9283dc140864a5c2680aac4a1fa7021dc9db15f4b8ce
|
3 |
+
size 6239
|
train/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99673475a7912c95b0ed04d99a11dfe4e9d76d0e776df1b8e2a0e8fa1a0f9eaa
|
3 |
+
size 13750542
|
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.1.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f59a8a85db93f10201d68a78baff68f5abd821520750778d6d4301a0ab70ea70
|
3 |
+
size 439876
|
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.1.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5715b86a8345b059f0a3d1128db621530fea2d7165406aa9e31d01f63f2ab07e
|
3 |
+
size 45898
|
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.1.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d00750332b6115e4b47276ca4b89f2f3197253d33961b9c5308a8c9fbaabe27
|
3 |
+
size 702005
|
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.1.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d238f56ef25ff63769a1094dd2fe13d94356f21ed65df48c76c66705c5bd723a
|
3 |
+
size 176064
|
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.1.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447929fb483a1408be0451ef62a8c684588151666cc21b0a8847b1be5088f89a
|
3 |
+
size 78
|
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.1.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86d90034bbc198eb909cc1180cd377e34419ca1057c7be22971aa66bf5f0ceb1
|
3 |
+
size 607916
|
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.2.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dda1358dbcb0156e3e5b1a6ea8357f02ea138b0b56d8e1d5fa2c07d3bfe1bea4
|
3 |
+
size 439876
|
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.2.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a2c33419b829fa990b581a9b0231c667c11aa9af91a47c0504a4de459b182e5
|
3 |
+
size 45898
|
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.2.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51843701c6d6991ebc37e2567d6e3fe191e7eead6c10730051ddae12409d7569
|
3 |
+
size 702005
|
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.2.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:829b787b512e1e20636c4c37c4d839e863096e26e9ccf324272360cd2ce9a3fb
|
3 |
+
size 176064
|
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.2.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:447929fb483a1408be0451ef62a8c684588151666cc21b0a8847b1be5088f89a
|
3 |
+
size 78
|
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.2.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7bd872b5f4ba0bcd1e0b95cb38cab1cb9ee16a8f37c3299f68e2d8823dad200
|
3 |
+
size 607916
|