Add SetFit model

Browse files

Files changed (11) hide show

1_Pooling/config.json +1 -1
README.md +52 -55
config.json +9 -19
config_sentence_transformers.json +3 -3
model.safetensors +2 -2
model_head.pkl +2 -2
modules.json +6 -0
sentence_bert_config.json +1 -1
tokenizer.json +2 -8
tokenizer_config.json +8 -1
vocab.txt +0 -6

1_Pooling/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "word_embedding_dimension": 768,
   "pooling_mode_cls_token": false,
   "pooling_mode_mean_tokens": true,
   "pooling_mode_max_tokens": false,

 {
+  "word_embedding_dimension": 384,
   "pooling_mode_cls_token": false,
   "pooling_mode_mean_tokens": true,
   "pooling_mode_max_tokens": false,

README.md CHANGED Viewed

@@ -8,23 +8,27 @@ tags:
 metrics:
 - accuracy
 widget:
-- text: For example, we cannot conclusively rule out the possibility that the five
-    wedges represent more than five seismic slip events.
-- text: Therefore the improvement of Aceclofenac dissolution is an important issue
-    for enhancing its onset of action and therapeutic efficacy.
-- text: After removal of protists and in situ incubations in dialysis bags, members
-    of the beta I clade increased to almost 30% of total cells within 24 h. It is
-    thus likely that these bacteria contributed disproportionally to the flux of organic
-    carbon from the picoplankton to the higher trophic levels.
-- text: At the conclusion of the study, participants were asked to comment on the
-    purpose of the&study.
-- text: It is therefore likely that many PEV chargers will trip in the 0.20-0.25 s
-    time frame.
 pipeline_tag: text-classification
 inference: true
-base_model: jinaai/jina-embeddings-v2-base-en
 model-index:
-- name: SetFit with jinaai/jina-embeddings-v2-base-en
   results:
   - task:
       type: text-classification
@@ -35,13 +39,13 @@ model-index:
       split: test
     metrics:
     - type: accuracy
-      value: 0.9777777777777777
       name: Accuracy
 ---
-# SetFit with jinaai/jina-embeddings-v2-base-en
-This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [jinaai/jina-embeddings-v2-base-en](https://huggingface.co/jinaai/jina-embeddings-v2-base-en) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
 The model has been trained using an efficient few-shot learning technique that involves:
@@ -52,9 +56,9 @@ The model has been trained using an efficient few-shot learning technique that i
 ### Model Description
 - **Model Type:** SetFit
-- **Sentence Transformer body:** [jinaai/jina-embeddings-v2-base-en](https://huggingface.co/jinaai/jina-embeddings-v2-base-en)
 - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
-- **Maximum Sequence Length:** 8192 tokens
 - **Number of Classes:** 9 classes
 <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
 <!-- - **Language:** Unknown -->
@@ -84,7 +88,7 @@ The model has been trained using an efficient few-shot learning technique that i
 ### Metrics
 | Label   | Accuracy |
 |:--------|:---------|
-| **all** | 0.9778   |
 ## Uses
@@ -104,7 +108,7 @@ from setfit import SetFitModel
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("Corran/Jina_Sci")
 # Run inference
-preds = model("It is therefore likely that many PEV chargers will trip in the 0.20-0.25 s time frame.")
 ```
 <!--
@@ -136,26 +140,26 @@ preds = model("It is therefore likely that many PEV chargers will trip in the 0.
 ### Training Set Metrics
 | Training set | Min | Median  | Max |
 |:-------------|:----|:--------|:----|
-| Word count   | 5   | 25.0778 | 98  |
 | Label | Training Sample Count |
 |:------|:----------------------|
-| 1     | 30                    |
-| 2     | 30                    |
-| 3     | 30                    |
-| 4     | 30                    |
-| 5     | 30                    |
-| 6     | 30                    |
-| 7     | 30                    |
-| 8     | 30                    |
-| 9     | 30                    |
 ### Training Hyperparameters
-- batch_size: (15, 15)
 - num_epochs: (1, 1)
 - max_steps: -1
 - sampling_strategy: oversampling
-- num_iterations: 30
 - body_learning_rate: (2e-05, 2e-05)
 - head_learning_rate: 2e-05
 - loss: CosineSimilarityLoss
@@ -171,28 +175,21 @@ preds = model("It is therefore likely that many PEV chargers will trip in the 0.
 ### Training Results
 | Epoch  | Step | Training Loss | Validation Loss |
 |:------:|:----:|:-------------:|:---------------:|
-| 0.0009 | 1    | 0.2692        | -               |
-| 0.0463 | 50   | 0.2293        | -               |
-| 0.0926 | 100  | 0.1244        | -               |
-| 0.1389 | 150  | 0.1245        | -               |
-| 0.1852 | 200  | 0.0595        | -               |
-| 0.2315 | 250  | 0.0102        | -               |
-| 0.2778 | 300  | 0.0042        | -               |
-| 0.3241 | 350  | 0.0036        | -               |
-| 0.3704 | 400  | 0.0031        | -               |
-| 0.4167 | 450  | 0.0015        | -               |
-| 0.4630 | 500  | 0.0007        | -               |
-| 0.5093 | 550  | 0.0008        | -               |
-| 0.5556 | 600  | 0.0008        | -               |
-| 0.6019 | 650  | 0.0006        | -               |
-| 0.6481 | 700  | 0.0005        | -               |
-| 0.6944 | 750  | 0.0006        | -               |
-| 0.7407 | 800  | 0.0006        | -               |
-| 0.7870 | 850  | 0.0006        | -               |
-| 0.8333 | 900  | 0.0007        | -               |
-| 0.8796 | 950  | 0.0005        | -               |
-| 0.9259 | 1000 | 0.0004        | -               |
-| 0.9722 | 1050 | 0.0003        | -               |
 ### Framework Versions
 - Python: 3.10.12

 metrics:
 - accuracy
 widget:
+- text: '6) , it is interesting to note how, going from lateral to downstream positions,
+    from 1 to 13: -charged hadrons (protons, pions, kaons) contribution rises from
+    34% to 48%; -electrons and positrons contribution rises from 30% to 40%; -muons
+    doses are stable around the 3-4%, representing an almost negligible portion of
+    the total; -photons doses decrease from 24% to 7% in terms of contribution to
+    the total; -neutrons contribution goes down from 8.5% to 2.5% in terms of contribution
+    to the total.'
+- text: the study was conducted in 2015 on adolescent undergraduate university students
+    of three fields of study -humanities, as well as medical and technical courses.
+- text: For this purpose, it was first necessary to discover the interdependencies
+    of the data attributes.
+- text: The patients included in this study were recruited from the Vascular Department
+    of West China Hospital, Sichuan University, between January 2009 and January 2011.
+- text: 1 Likewise, age at diagnosis (P Ͻ 0.001), primary site (P ϭ 0.04), number
+    of positive nodes (P Ͻ 0.001), and depth of invasion (P Ͻ 0.001) had a significant
+    impact on diseasespecific survival of the MRI patients.
 pipeline_tag: text-classification
 inference: true
+base_model: sentence-transformers/all-MiniLM-L6-v2
 model-index:
+- name: SetFit with sentence-transformers/all-MiniLM-L6-v2
   results:
   - task:
       type: text-classification
       split: test
     metrics:
     - type: accuracy
+      value: 0.9433333333333334
       name: Accuracy
 ---
+# SetFit with sentence-transformers/all-MiniLM-L6-v2
+This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification.
 The model has been trained using an efficient few-shot learning technique that involves:
 ### Model Description
 - **Model Type:** SetFit
+- **Sentence Transformer body:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2)
 - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance
+- **Maximum Sequence Length:** 256 tokens
 - **Number of Classes:** 9 classes
 <!-- - **Training Dataset:** [Unknown](https://huggingface.co/datasets/unknown) -->
 <!-- - **Language:** Unknown -->
 ### Metrics
 | Label   | Accuracy |
 |:--------|:---------|
+| **all** | 0.9433   |
 ## Uses
 # Download from the 🤗 Hub
 model = SetFitModel.from_pretrained("Corran/Jina_Sci")
 # Run inference
+preds = model("For this purpose, it was first necessary to discover the interdependencies of the data attributes.")
 ```
 <!--
 ### Training Set Metrics
 | Training set | Min | Median  | Max |
 |:-------------|:----|:--------|:----|
+| Word count   | 5   | 26.2526 | 128 |
 | Label | Training Sample Count |
 |:------|:----------------------|
+| 1     | 300                   |
+| 2     | 300                   |
+| 3     | 300                   |
+| 4     | 300                   |
+| 5     | 300                   |
+| 6     | 300                   |
+| 7     | 300                   |
+| 8     | 300                   |
+| 9     | 300                   |
 ### Training Hyperparameters
+- batch_size: (75, 75)
 - num_epochs: (1, 1)
 - max_steps: -1
 - sampling_strategy: oversampling
+- num_iterations: 10
 - body_learning_rate: (2e-05, 2e-05)
 - head_learning_rate: 2e-05
 - loss: CosineSimilarityLoss
 ### Training Results
 | Epoch  | Step | Training Loss | Validation Loss |
 |:------:|:----:|:-------------:|:---------------:|
+| 0.0014 | 1    | 0.4034        | -               |
+| 0.0694 | 50   | 0.2314        | -               |
+| 0.1389 | 100  | 0.1816        | -               |
+| 0.2083 | 150  | 0.1708        | -               |
+| 0.2778 | 200  | 0.1079        | -               |
+| 0.3472 | 250  | 0.1407        | -               |
+| 0.4167 | 300  | 0.0788        | -               |
+| 0.4861 | 350  | 0.0565        | -               |
+| 0.5556 | 400  | 0.0651        | -               |
+| 0.625  | 450  | 0.0402        | -               |
+| 0.6944 | 500  | 0.0468        | -               |
+| 0.7639 | 550  | 0.055         | -               |
+| 0.8333 | 600  | 0.0473        | -               |
+| 0.9028 | 650  | 0.0605        | -               |
+| 0.9722 | 700  | 0.03          | -               |
 ### Framework Versions
 - Python: 3.10.12

config.json CHANGED Viewed

@@ -1,36 +1,26 @@
 {
-  "_name_or_path": "/root/.cache/torch/sentence_transformers/jinaai_jina-embeddings-v2-base-en/",
   "architectures": [
-    "JinaBertModel"
   ],
-  "attention_probs_dropout_prob": 0.0,
-  "attn_implementation": "torch",
-  "auto_map": {
-    "AutoConfig": "configuration_bert.JinaBertConfig",
-    "AutoModel": "modeling_bert.JinaBertModel",
-    "AutoModelForMaskedLM": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForMaskedLM",
-    "AutoModelForSequenceClassification": "jinaai/jina-bert-implementation--modeling_bert.JinaBertForSequenceClassification"
-  },
   "classifier_dropout": null,
-  "emb_pooler": "mean",
-  "feed_forward_type": "geglu",
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "layer_norm_eps": 1e-12,
-  "max_position_embeddings": 8192,
-  "model_max_length": 8192,
   "model_type": "bert",
   "num_attention_heads": 12,
-  "num_hidden_layers": 12,
   "pad_token_id": 0,
-  "position_embedding_type": "alibi",
   "torch_dtype": "float32",
   "transformers_version": "4.35.2",
   "type_vocab_size": 2,
   "use_cache": true,
-  "vocab_size": 30528
 }

 {
+  "_name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_all-MiniLM-L6-v2/",
   "architectures": [
+    "BertModel"
   ],
+  "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
   "initializer_range": 0.02,
+  "intermediate_size": 1536,
   "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
   "model_type": "bert",
   "num_attention_heads": 12,
+  "num_hidden_layers": 6,
   "pad_token_id": 0,
+  "position_embedding_type": "absolute",
   "torch_dtype": "float32",
   "transformers_version": "4.35.2",
   "type_vocab_size": 2,
   "use_cache": true,
+  "vocab_size": 30522
 }

config_sentence_transformers.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "__version__": {
-    "sentence_transformers": "2.2.2",
-    "transformers": "4.31.0",
-    "pytorch": "2.0.1"
   }
 }

 {
   "__version__": {
+    "sentence_transformers": "2.0.0",
+    "transformers": "4.6.1",
+    "pytorch": "1.8.1"
   }
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c49986bb9f2e7acb06240510be0e6957916666252e860c2bb48c0f46936a6777
-size 549493968

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cd93b873f934fbe3a1d10049c161f170826c92f8a75494c1691c4e1f3e9806e
+size 90864192

model_head.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:edae54e5c1d2e81a5cae6d54fd1d1da53917803f1306d72d495ba3fb698d6e7e
-size 56271

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0850ce8fb35582a08b67fcbbf3b3dc95f9aa97fdc04ba4004288df94a163c2f
+size 28623

modules.json CHANGED Viewed

@@ -10,5 +10,11 @@
     "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
   }
 ]

     "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
   }
 ]

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 8192,
   "do_lower_case": false
 }

 {
+  "max_seq_length": 256,
   "do_lower_case": false
 }

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 8192,
     "strategy": "LongestFirst",
     "stride": 0
   },
@@ -30678,13 +30678,7 @@
       "##／": 30518,
       "##：": 30519,
       "##？": 30520,
-      "##～": 30521,
-      "bowang": 30522,
-      "georgiosmastrapas": 30523,
-      "jackminong": 30524,
-      "alaeddineabdessalem": 30525,
-      "isabellemohr": 30526,
-      "michaelguenther": 30527
     }
   }
 }

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 256,
     "strategy": "LongestFirst",
     "stride": 0
   },
       "##／": 30518,
       "##：": 30519,
       "##？": 30520,
+      "##～": 30521
     }
   }
 }

tokenizer_config.json CHANGED Viewed

@@ -46,12 +46,19 @@
   "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
-  "model_max_length": 2147483648,
   "never_split": null,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   "do_basic_tokenize": true,
   "do_lower_case": true,
   "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 512,
   "never_split": null,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

vocab.txt CHANGED Viewed

@@ -30520,9 +30520,3 @@ necessitated
 ##：
 ##？
 ##～
-bowang
-georgiosmastrapas
-jackminong
-alaeddineabdessalem
-isabellemohr
-michaelguenther

 ##：
 ##？
 ##～