davebulaval commited on
Commit
4e0f879
β€’
1 Parent(s): 04f1736

improve processing and doc

Browse files
Files changed (2) hide show
  1. code_examples.py +29 -0
  2. meaningbert.py +20 -8
code_examples.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
6
+ scorer = AutoModelForSequenceClassification.from_pretrained("davebulaval/MeaningBERT")
7
+ scorer.eval()
8
+
9
+ documents = [
10
+ "He wanted to make them pay.",
11
+ "This sandwich looks delicious.",
12
+ "He wants to eat.",
13
+ ]
14
+ simplifications = [
15
+ "He wanted to make them pay.",
16
+ "This sandwich looks delicious.",
17
+ "Whatever, whenever, this is a sentence.",
18
+ ]
19
+
20
+ # We tokenize the text as a pair and return Pytorch Tensors
21
+ tokenize_text = tokenizer(
22
+ documents, simplifications, truncation=True, padding=True, return_tensors="pt"
23
+ )
24
+
25
+ with torch.no_grad():
26
+ # We process the text
27
+ scores = scorer(**tokenize_text)
28
+
29
+ print(scores.logits.tolist())
meaningbert.py CHANGED
@@ -24,7 +24,9 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
24
  @contextmanager
25
  def filter_logging_context():
26
  def filter_log(record):
27
- return False if "This IS expected if you are initializing" in record.msg else True
 
 
28
 
29
  logger = datasets.utils.logging.get_logger("transformers.modeling_utils")
30
  logger.addFilter(filter_log)
@@ -105,23 +107,33 @@ class MeaningBERTScore(evaluate.Metric):
105
  )
106
 
107
  def _compute(
108
- self,
109
- documents: List,
110
- simplifications: List,
111
- verbose: bool = False,
112
  ) -> Dict:
113
  assert len(documents) == len(
114
- simplifications), "The number of document is different of the number of simplifications."
 
115
  hashcode = _HASH
116
 
117
  # We load the MeaningBERT pretrained model
118
- scorer = AutoModelForSequenceClassification.from_pretrained("davebulaval/MeaningBERT")
 
 
 
119
 
120
  # We load MeaningBERT tokenizer
121
  tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
122
 
123
  # We tokenize the text as a pair and return Pytorch Tensors
124
- tokenize_text = tokenizer(documents, simplifications, truncation=True, padding=True, return_tensors="pt")
 
 
 
 
 
 
125
 
126
  with filter_logging_context():
127
  # We process the text
 
24
  @contextmanager
25
  def filter_logging_context():
26
  def filter_log(record):
27
+ return (
28
+ False if "This IS expected if you are initializing" in record.msg else True
29
+ )
30
 
31
  logger = datasets.utils.logging.get_logger("transformers.modeling_utils")
32
  logger.addFilter(filter_log)
 
107
  )
108
 
109
  def _compute(
110
+ self,
111
+ documents: List,
112
+ simplifications: List,
113
+ verbose: bool = False,
114
  ) -> Dict:
115
  assert len(documents) == len(
116
+ simplifications
117
+ ), "The number of document is different of the number of simplifications."
118
  hashcode = _HASH
119
 
120
  # We load the MeaningBERT pretrained model
121
+ scorer = AutoModelForSequenceClassification.from_pretrained(
122
+ "davebulaval/MeaningBERT"
123
+ )
124
+ scorer.eval()
125
 
126
  # We load MeaningBERT tokenizer
127
  tokenizer = AutoTokenizer.from_pretrained("davebulaval/MeaningBERT")
128
 
129
  # We tokenize the text as a pair and return Pytorch Tensors
130
+ tokenize_text = tokenizer(
131
+ documents,
132
+ simplifications,
133
+ truncation=True,
134
+ padding=True,
135
+ return_tensors="pt",
136
+ )
137
 
138
  with filter_logging_context():
139
  # We process the text