Add new SentenceTransformer model

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +536 -0
config.json +24 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +72 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,536 @@

+---
+base_model: sentence-transformers/all-mpnet-base-v2
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+pipeline_tag: sentence-similarity
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:807656
+- loss:MultipleNegativesRankingLoss
+widget:
+- source_sentence: '<p id="pa01" num="0001">An decoding method according to an embodiment
+    includes a deriving step and an decoding step. The deriving step derives a first
+    reference value that is a reference value of a weighting factor based on fixed
+    point precision representing roughness of the weighting factor that is used for
+    making a motion-compensated prediction of a change in a pixel value by multiplying
+    a reference image by the weighting factor. The decoding step decodes a first difference
+    value that is a difference value between the weighting factor and the first reference
+    value. The weighting factor is included in a range of predetermined bit precision
+    having the first reference value at approximate center.
+    <img id="iaf01" file="imgaf001.tif" wi="146" he="85" img-content="drawing" img-format="tif"/></p>'
+  sentences:
+  - DECODING METHOD AND DECODING DEVICE
+  - METHOD FOR DETERMINING SEMI-SYNCHRONOUS EXPOSURE PARAMETERS AND ELECTRONIC DEVICE
+  - HOISTING ROPE MONITORING DEVICE
+- source_sentence: <p id="pa01" num="0001">A layered sheet 10 includes a substrate
+    layer 1, and surface layers 2 and 3 configured to be layered on at least one surface
+    of the substrate layer 1. The substrate layer 1 contains a first thermoplastic
+    resin and inorganic fillers. The surface layers 2 and 3 contain a second thermoplastic
+    resin and a conductive material. A content of the inorganic fillers in the substrate
+    layer 1 is 0.3 to 28 mass% based on a total amount of the substrate layer.<img
+    id="iaf01" file="imgaf001.tif" wi="86" he="70" img-content="drawing" img-format="tif"/><img
+    id="iaf02" file="imgaf002.tif" wi="165" he="117" img-content="drawing" img-format="tif"/></p>
+  sentences:
+  - LAYERED SHEET, CONTAINER, CARRIER TAPE, AND ELECTRONIC COMPONENT PACKAGING BODY
+  - BLOCK COPOLYMERS FOR GEL COMPOSITIONS WITH IMPROVED EFFICIENCY
+  - AN INDICATOR SYSTEM FOR A PERISHABLE PRODUCT CONTAINER
+- source_sentence: '<p id="pa01" num="0001">A method for manufacturing a gear which
+    effectively prevent a crack from occurring inside a tooth part when rolling processing
+    is performed on a teeth part of a gear raw material is achieved. A method according
+    to one embodiment for manufacturing a gear 15 by performing rolling processing
+    on a tooth part 2a of a sintered gear raw material 2. The method includes, when
+    the rolling processing is performed on the tooth part 2a of the gear raw material
+    2, pressing the gear raw material 2 toward a center of rotation of the gear raw
+    material 2 by a rolling machine 4 and, when at least the rolling processing is
+    performed on the tooth part 2a of the gear raw material 2 toward a center of a
+    thickness thereof by a pressing machine 5, pressing a region A where an internal
+    density of the tooth part 2a of the gear raw material 2 decreases.</p><p id="pa02"
+    num="0002">The invention also relates to an apparatus for manufacturing a gear.
+    <img id="iaf01" file="imgaf001.tif" wi="106" he="68" img-content="drawing" img-format="tif"/></p>'
+  sentences:
+  - COMMUNICATION METHOD, RELATED APPARATUS AND DEVICE AND COMPUTER-READABLE STORAGE
+    MEDIUM
+  - METHOD AND APPARATUS FOR MANUFACTURING GEAR
+  - IMPLANTABLE MEDICAL DEVICE AND METHOD OF PROVIDING WIRE CONNECTIONS FOR IT
+- source_sentence: '<p id="pa01" num="0001">This application discloses a data reading
+    method, apparatus, and system, and a distributed system, and belongs to the field
+    of storage technologies. The method includes: receiving a data read request sent
+    by a terminal, where the data read request includes a logical address of target
+    data; locally searching, based on the logical address, a first slave node for
+    a latest version of the target data; and when it is determined that the latest
+    version of the target data has been stored in each of a plurality of slave nodes,
+    sending the latest version of the target data to the terminal. This application
+    can avoid a rollback of a version of read data, and this application applies to
+    data reading.<img id="iaf01" file="imgaf001.tif" wi="62" he="86" img-content="drawing"
+    img-format="tif"/><img id="iaf02" file="imgaf002.tif" wi="155" he="233" img-content="drawing"
+    img-format="tif"/></p>'
+  sentences:
+  - SLIDING MECHANISM AND TERMINAL DEVICE PROVIDED WITH SAME
+  - PRESSURE-APPLYING DEVICE FOR A SWITCHING MODULE AND METHOD OF CHANGING A SWITCHING
+    MODULE USING THE SAME
+  - DATA READING METHOD, DEVICE, SYSTEM, AND DISTRIBUTED SYSTEM
+- source_sentence: '<p id="pa01" num="0001">An application apparatus (100) includes:
+    an application needle (24) that applies, to a target, an application material
+    having its viscosity changing under shear; a drive unit (90) that moves the application
+    needle (24) up and down; and a controller (80) that controls the drive unit (90)
+    to move the application needle such that shear is applied to the application material
+    at a shear speed depending on a type of the application material and depending
+    on a target application amount or a target application diameter.<img id="iaf01"
+    file="imgaf001.tif" wi="78" he="56" img-content="drawing" img-format="tif"/></p>'
+  sentences:
+  - HEAT PROCESSING DEVICE
+  - Electric motor
+  - COATING APPARATUS AND COATING METHOD
+model-index:
+- name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: sentence transformers/all mpnet base v2
+      type: sentence-transformers/all-mpnet-base-v2
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.592
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.711
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.751
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.814
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.592
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.237
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.1502
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.0814
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.592
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.711
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.751
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.814
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.6987639783179386
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.6624964285714287
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.6665468875517868
+      name: Cosine Map@100
+---
+# SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision f1b1b820e405bb8644f5e8d9a3b98f9c9e0a3c58 -->
+- **Maximum Sequence Length:** 384 tokens
+- **Output Dimensionality:** 768 tokens
+- **Similarity Function:** Cosine Similarity
+- **Training Dataset:**
+    - json
+<!-- - **Language:** Unknown -->
+<!-- - **License:** Unknown -->
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("stephenhib/all-mpnet-base-v2-patabs-1epoc-batch32-100000")
+# Run inference
+sentences = [
+    '<p id="pa01" num="0001">An application apparatus (100) includes: an application needle (24) that applies, to a target, an application material having its viscosity changing under shear; a drive unit (90) that moves the application needle (24) up and down; and a controller (80) that controls the drive unit (90) to move the application needle such that shear is applied to the application material at a shear speed depending on a type of the application material and depending on a target application amount or a target application diameter.<img id="iaf01" file="imgaf001.tif" wi="78" he="56" img-content="drawing" img-format="tif"/></p>',
+    'COATING APPARATUS AND COATING METHOD',
+    'Electric motor',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Dataset: `sentence-transformers/all-mpnet-base-v2`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.592      |
+| cosine_accuracy@3   | 0.711      |
+| cosine_accuracy@5   | 0.751      |
+| cosine_accuracy@10  | 0.814      |
+| cosine_precision@1  | 0.592      |
+| cosine_precision@3  | 0.237      |
+| cosine_precision@5  | 0.1502     |
+| cosine_precision@10 | 0.0814     |
+| cosine_recall@1     | 0.592      |
+| cosine_recall@3     | 0.711      |
+| cosine_recall@5     | 0.751      |
+| cosine_recall@10    | 0.814      |
+| cosine_ndcg@10      | 0.6988     |
+| cosine_mrr@10       | 0.6625     |
+| **cosine_map@100**  | **0.6665** |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### json
+* Dataset: json
+* Size: 807,656 training samples
+* Columns: <code>positive</code> and <code>anchor</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | positive                                                                             | anchor                                                                             |
+  |:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                               | string                                                                             |
+  | details | <ul><li>min: 45 tokens</li><li>mean: 237.14 tokens</li><li>max: 384 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 12.34 tokens</li><li>max: 101 tokens</li></ul> |
+* Samples:
+  | positive                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          | anchor                                                                                                                                          |
+  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code><p id="pa01" num="0001">The invention relates to an image fusion method and device, which includes: obtaining a first short-focus image and a first long-focus image acquired by a short-focus sensor and a long-focus sensor at the same time; according to the focal lengths of a short-focus lens and a long-focus lens, calculating a reduction coefficient corresponding to the first long-focus image when the sizes of the same target in the first long-focus image and the first short-focus image are matched; performing a reduction processing on the first long-focus image according to the reduction coefficient to obtain a second long-focus image; according to a relative angle of the current long-focus lens and short-focus lens, calculating a position of the second long-focus image in the first short-focus image when the positions of the same target in the second long-focus image and the first short-focus image are matched; and according to the position of the second long-focus image in the first short-focus image, covering the first short-focus image by the second long-focus image to obtain a fused image. According to embodiments of the present application, on the premise of considering both the monitoring range and the definition, the monitoring cost is reduced, and the monitoring efficiency is improved.<img id="iaf01" file="imgaf001.tif" wi="92" he="72" img-content="drawing" img-format="tif"/></p></code> | <code>IMAGE FUSION METHOD AND DEVICE</code>                                                                                                     |
+  | <code><p id="pa01" num="0001">The present invention discloses an <i>ex vivo</i> method for the diagnostic and/or prognostic assessment of the acute-on-chronic liver failure (ACLF) syndrome in a patient with a liver disorder characterized in that it comprises the steps of: (a) measuring a panel of metabolites related with acylcarnitines-sialic acid-acetylated amino acids and/or sugar alcohols and derivatives-tryptophan metabolism-catecholamines derivatives in a biological sample of said patient; and (b) comparing the level of said metabolites in the sample with the level of said metabolites in healthy patients; and wherein an increase of at least 1.2 times of the level of said metabolites is indicative of ACLF syndrome.</p></code>                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               | <code>METHOD FOR THE DIAGNOSTIC AND/OR PROGNOSTIC ASSESSMENT OF ACUTE-ON-CHRONIC LIVER FAILURE SYNDROME IN PATIENTS WITH LIVER DISORDERS</code> |
+  | <code><p id="pa01" num="0001">A valve housing receives a spool 34 and the spool has a regulating chamber 52 selectively communicating a supply line to a return line. The spool 34 is biased in one direction by a spring force and there is a second force biasing the spool in an opposed direction whith the second bias force being provided by a fluid pressure within a hydraulic system associated which the pressure regulating valve. The amount of communication between the supply port 111 and the return port 99 is regulated by a position of the spool 34 as the bias force from the fluid pressure change. Damper chambers are provided on opposed sides of the spool and serve to dampen a speed of movement of the spool and a supply line for supplying fluid into the damper chambers through check valves 44, 64. The supply line serves to assist in purging air outwardly of the damper chambers.<br><img id="iaf01" file="imgaf001.tif" wi="142" he="100" img-content="drawing" img-format="tif"/></p></code>                                                                                                                                                                                                                                                                                                                                                                                                                                             | <code>Air purging pressure regulating valve</code>                                                                                              |
+* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
+  ```json
+  {
+      "scale": 20.0,
+      "similarity_fct": "cos_sim"
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 2
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `warmup_ratio`: 0.1
+- `bf16`: True
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 4
+- `per_device_eval_batch_size`: 2
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: False
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch | Step | Training Loss | sentence-transformers/all-mpnet-base-v2_cosine_map@100 |
+|:-----:|:----:|:-------------:|:------------------------------------------------------:|
+| 0.032 | 100  | 0.1433        | 0.6217                                                 |
+| 0.064 | 200  | 0.0953        | 0.6447                                                 |
+| 0.096 | 300  | 0.1084        | 0.6612                                                 |
+| 0.128 | 400  | 0.0817        | 0.6546                                                 |
+| 0.16  | 500  | 0.0768        | 0.6512                                                 |
+| 0.192 | 600  | 0.0779        | 0.6466                                                 |
+| 0.224 | 700  | 0.0709        | 0.6594                                                 |
+| 0.256 | 800  | 0.0813        | 0.6441                                                 |
+| 0.288 | 900  | 0.0597        | 0.6454                                                 |
+| 0.32  | 1000 | 0.0744        | 0.6496                                                 |
+| 0.352 | 1100 | 0.0669        | 0.6608                                                 |
+| 0.384 | 1200 | 0.0657        | 0.6566                                                 |
+| 0.416 | 1300 | 0.0489        | 0.6660                                                 |
+| 0.448 | 1400 | 0.0643        | 0.6597                                                 |
+| 0.48  | 1500 | 0.0593        | 0.6587                                                 |
+| 0.512 | 1600 | 0.0598        | 0.6613                                                 |
+| 0.544 | 1700 | 0.0737        | 0.6570                                                 |
+| 0.576 | 1800 | 0.0661        | 0.6655                                                 |
+| 0.608 | 1900 | 0.0499        | 0.6613                                                 |
+| 0.64  | 2000 | 0.0641        | 0.6616                                                 |
+| 0.672 | 2100 | 0.0679        | 0.6662                                                 |
+| 0.704 | 2200 | 0.0521        | 0.6715                                                 |
+| 0.736 | 2300 | 0.0569        | 0.6651                                                 |
+| 0.768 | 2400 | 0.0507        | 0.6679                                                 |
+| 0.8   | 2500 | 0.0405        | 0.6678                                                 |
+| 0.832 | 2600 | 0.0548        | 0.6690                                                 |
+| 0.864 | 2700 | 0.0403        | 0.6692                                                 |
+| 0.896 | 2800 | 0.0613        | 0.6649                                                 |
+| 0.928 | 2900 | 0.0485        | 0.6673                                                 |
+| 0.96  | 3000 | 0.0495        | 0.6674                                                 |
+| 0.992 | 3100 | 0.0546        | 0.6665                                                 |
+### Framework Versions
+- Python: 3.11.9
+- Sentence Transformers: 3.2.1
+- Transformers: 4.45.2
+- PyTorch: 2.3.1.post300
+- Accelerate: 1.0.1
+- Datasets: 3.0.1
+- Tokenizers: 0.20.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
+  "architectures": [
+    "MPNetModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "mpnet",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "relative_attention_num_buckets": 32,
+  "torch_dtype": "float32",
+  "transformers_version": "4.45.2",
+  "vocab_size": 30527
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.2.1",
+    "transformers": "4.45.2",
+    "pytorch": "2.3.1.post300"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": null
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33358ac4bf89a8052e2964afdf4e0cfc3811c1976118ac3e384dc5240d8c99be
+size 437967672

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 384,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "104": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30526": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "do_lower_case": true,
+  "eos_token": "</s>",
+  "mask_token": "<mask>",
+  "max_length": 128,
+  "model_max_length": 384,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "MPNetTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff