Training in progress epoch 0

Files changed (4) hide show

README.md CHANGED Viewed

@@ -1,5 +1,4 @@
 ---
-license: apache-2.0
 tags:
 - generated_from_keras_callback
 model-index:
@@ -12,10 +11,10 @@ probably proofread and complete it, then remove this comment. -->
 # dosai/bert-sudb
-This model is a fine-tuned version of [bert-base-multilingual-uncased](https://huggingface.co/bert-base-multilingual-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Train Loss: 5.9514
-- Epoch: 7
 ## Model description
@@ -41,19 +40,12 @@ The following hyperparameters were used during training:
 | Train Loss | Epoch |
 |:----------:|:-----:|
-| 5.9578     | 0     |
-| 5.9547     | 1     |
-| 5.9542     | 2     |
-| 5.9526     | 3     |
-| 5.9522     | 4     |
-| 5.9520     | 5     |
-| 5.9515     | 6     |
-| 5.9514     | 7     |
 ### Framework versions
-- Transformers 4.29.2
 - TensorFlow 2.12.0
 - Datasets 2.12.0
 - Tokenizers 0.13.3

 ---
 tags:
 - generated_from_keras_callback
 model-index:
 # dosai/bert-sudb
+This model is a fine-tuned version of [alon-albalak/bert-base-multilingual-xquad](https://huggingface.co/alon-albalak/bert-base-multilingual-xquad) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Train Loss: 5.9549
+- Epoch: 0
 ## Model description
 | Train Loss | Epoch |
 |:----------:|:-----:|
+| 5.9549     | 0     |
 ### Framework versions
+- Transformers 4.30.0
 - TensorFlow 2.12.0
 - Datasets 2.12.0
 - Tokenizers 0.13.3

config.json CHANGED Viewed

@@ -1,11 +1,12 @@
 {
-  "_name_or_path": "bert-base-multilingual-uncased",
   "architectures": [
     "BertForQuestionAnswering"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "directionality": "bidi",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
@@ -23,7 +24,7 @@
   "pooler_size_per_head": 128,
   "pooler_type": "first_token_transform",
   "position_embedding_type": "absolute",
-  "transformers_version": "4.29.2",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 105879

 {
+  "_name_or_path": "alon-albalak/bert-base-multilingual-xquad",
   "architectures": [
     "BertForQuestionAnswering"
   ],
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "directionality": "bidi",
+  "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "pooler_size_per_head": 128,
   "pooler_type": "first_token_transform",
   "position_embedding_type": "absolute",
+  "transformers_version": "4.30.0",
   "type_vocab_size": 2,
   "use_cache": true,
   "vocab_size": 105879

tf_model.h5 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aff3729e311f72630613f3df49b436b1972a4cd394fd5ea04592958edd5384b0
 size 667338704

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1512574c243da9578d9b7586146c7ceb1249726a201a0b51b10b60bbe101fed
 size 667338704

tokenizer.json CHANGED Viewed

@@ -1,7 +1,21 @@
 {
   "version": "1.0",
-  "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 384,
+    "strategy": "OnlySecond",
+    "stride": 128
+  },
+  "padding": {
+    "strategy": {
+      "Fixed": 384
+    },
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 0,
+    "pad_type_id": 0,
+    "pad_token": "[PAD]"
+  },
   "added_tokens": [
     {
       "id": 0,