Upload TFFlaubertForSequenceClassification

Browse files

Files changed (3) hide show

README.md +73 -0
config.json +91 -0
tf_model.h5 +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,73 @@

+---
+license: mit
+tags:
+- generated_from_keras_callback
+model-index:
+- name: Flaubert_1619
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# Flaubert_1619
+This model is a fine-tuned version of [flaubert/flaubert_base_cased](https://huggingface.co/flaubert/flaubert_base_cased) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Train Loss: 0.2458
+- Validation Loss: 0.5339
+- Train Accuracy: 0.8170
+- Epoch: 19
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: {'name': 'Adam', 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'jit_compile': True, 'is_legacy_optimizer': False, 'learning_rate': {'class_name': 'PolynomialDecay', 'config': {'initial_learning_rate': 2e-05, 'decay_steps': 1432, 'end_learning_rate': 0.0, 'power': 1.0, 'cycle': False, 'name': None}}, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-08, 'amsgrad': False}
+- training_precision: float32
+### Training results
+| Train Loss | Validation Loss | Train Accuracy | Epoch |
+|:----------:|:---------------:|:--------------:|:-----:|
+| 1.0000     | 0.5841          | 0.7769         | 0     |
+| 0.5526     | 0.5284          | 0.7845         | 1     |
+| 0.3909     | 0.4806          | 0.8221         | 2     |
+| 0.2798     | 0.5339          | 0.8170         | 3     |
+| 0.2378     | 0.5339          | 0.8170         | 4     |
+| 0.2514     | 0.5339          | 0.8170         | 5     |
+| 0.2403     | 0.5339          | 0.8170         | 6     |
+| 0.2373     | 0.5339          | 0.8170         | 7     |
+| 0.2441     | 0.5339          | 0.8170         | 8     |
+| 0.2529     | 0.5339          | 0.8170         | 9     |
+| 0.2400     | 0.5339          | 0.8170         | 10    |
+| 0.2337     | 0.5339          | 0.8170         | 11    |
+| 0.2394     | 0.5339          | 0.8170         | 12    |
+| 0.2383     | 0.5339          | 0.8170         | 13    |
+| 0.2464     | 0.5339          | 0.8170         | 14    |
+| 0.2464     | 0.5339          | 0.8170         | 15    |
+| 0.2468     | 0.5339          | 0.8170         | 16    |
+| 0.2427     | 0.5339          | 0.8170         | 17    |
+| 0.2546     | 0.5339          | 0.8170         | 18    |
+| 0.2458     | 0.5339          | 0.8170         | 19    |
+### Framework versions
+- Transformers 4.30.2
+- TensorFlow 2.12.0
+- Datasets 2.12.0
+- Tokenizers 0.13.3

config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "_name_or_path": "flaubert/flaubert_base_cased",
+  "amp": 1,
+  "architectures": [
+    "FlaubertForSequenceClassification"
+  ],
+  "asm": false,
+  "attention_dropout": 0.1,
+  "bos_index": 0,
+  "bos_token_id": 0,
+  "bptt": 512,
+  "causal": false,
+  "clip_grad_norm": 5,
+  "dropout": 0.1,
+  "emb_dim": 768,
+  "embed_init_std": 0.02209708691207961,
+  "encoder_only": true,
+  "end_n_top": 5,
+  "eos_index": 1,
+  "fp16": true,
+  "gelu_activation": true,
+  "group_by_size": true,
+  "id2label": {
+    "0": "Pas de cyberharc\u00e8lement",
+    "1": "Racisme",
+    "2": "Sexisme",
+    "3": "Aggression"
+  },
+  "id2lang": {
+    "0": "fr"
+  },
+  "init_std": 0.02,
+  "is_encoder": true,
+  "label2id": {
+    "Aggression": 3,
+    "Pas de cyberharc\u00e8lement": 0,
+    "Racisme": 1,
+    "Sexisme": 2
+  },
+  "lang2id": {
+    "fr": 0
+  },
+  "lang_id": 0,
+  "langs": [
+    "fr"
+  ],
+  "layer_norm_eps": 1e-12,
+  "layerdrop": 0.0,
+  "lg_sampling_factor": -1,
+  "lgs": "fr",
+  "mask_index": 5,
+  "mask_token_id": 0,
+  "max_batch_size": 0,
+  "max_position_embeddings": 512,
+  "max_vocab": -1,
+  "mlm_steps": [
+    [
+      "fr",
+      null
+    ]
+  ],
+  "model_type": "flaubert",
+  "n_heads": 12,
+  "n_langs": 1,
+  "n_layers": 12,
+  "pad_index": 2,
+  "pad_token_id": 2,
+  "pre_norm": false,
+  "sample_alpha": 0,
+  "share_inout_emb": true,
+  "sinusoidal_embeddings": false,
+  "start_n_top": 5,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "first",
+  "summary_use_proj": true,
+  "tokens_per_batch": -1,
+  "transformers_version": "4.30.2",
+  "unk_index": 3,
+  "use_lang_emb": true,
+  "vocab_size": 68729,
+  "word_blank": 0,
+  "word_dropout": 0,
+  "word_keep": 0.1,
+  "word_mask": 0.8,
+  "word_mask_keep_rand": "0.8,0.1,0.1",
+  "word_pred": 0.15,
+  "word_rand": 0.1,
+  "word_shuffle": 0
+}

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:745f504f44793ca36929e71584783ee18bc553e45a74cf0c0a69737b5bff91ca
+size 553167240