Graphcore
/

gptj-mnli

Text Generation

text-classification

Model card Files Files and versions Community

sofial commited on Aug 24, 2022

Commit

31b0b7e

·

1 Parent(s): 809adbb

Upload data_utils.py

Files changed (1) hide show

data_utils.py +28 -0

data_utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Copyright (c) 2022 Graphcore Ltd. All rights reserved.
+from typing import Dict, Any
+def form_text(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Given an example from the glue mnli dataset, generate a prompt version example in the format:
+        mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>
+    This format can be used to finetune the model as a Causal Languange Model.
+    """
+    hypothesis = example['hypothesis']
+    premise = example['premise']
+    class_label = ['entailment', 'neutral', 'contradiction'][example['label']]
+    example[
+        'text'] = f'mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>'
+    return example
+def split_text(example: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Given an example in the format
+        mnli hypothesis: {hypothesis} premise: {premise} target: {class_label}<|endoftext|>
+    split it in the prompt to be used for validation (excluding the target) and the class label.
+    """
+    partition = example['text'].rpartition(' ')
+    example['prompt_text'] = partition[0]
+    example['class_label'] = partition[2].replace('<|endoftext|>', '')
+    return example