Spaces:
Sleeping
Sleeping
ronald
commited on
Commit
·
d7885fe
1
Parent(s):
9609d1c
coh mech
Browse files- ccl_win.py +23 -6
ccl_win.py
CHANGED
@@ -21,6 +21,8 @@ import getpass
|
|
21 |
import pdb
|
22 |
import os
|
23 |
import torch
|
|
|
|
|
24 |
|
25 |
# TODO: Add BibTeX citation
|
26 |
_CITATION = """\
|
@@ -113,7 +115,7 @@ class ccl_win(evaluate.Measurement):
|
|
113 |
|
114 |
|
115 |
|
116 |
-
def _compute(self, predictions, dataset, batch_size: int = 16, device=None):
|
117 |
"""Returns the scores"""
|
118 |
MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
|
119 |
BASEDIR = "/gfs/team/nlp/users/rcardena/tools/new_evals/ccl_win"
|
@@ -129,6 +131,12 @@ class ccl_win(evaluate.Measurement):
|
|
129 |
else:
|
130 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
tokenizer = AutoTokenizer.from_pretrained("roberta-large")
|
133 |
|
134 |
model = AutoModelForSequenceClassification.from_pretrained(os.path.join(BASEDIR,dataset))
|
@@ -148,12 +156,21 @@ class ccl_win(evaluate.Measurement):
|
|
148 |
probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
|
149 |
scores.extend(probs[:,0].tolist())
|
150 |
#
|
151 |
-
|
152 |
offset = 0
|
153 |
for _len in len_by_sample:
|
154 |
-
|
|
|
|
|
|
|
|
|
155 |
offset += _len
|
156 |
#
|
157 |
-
|
158 |
-
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
21 |
import pdb
|
22 |
import os
|
23 |
import torch
|
24 |
+
from rouge_score import scoring
|
25 |
+
|
26 |
|
27 |
# TODO: Add BibTeX citation
|
28 |
_CITATION = """\
|
|
|
115 |
|
116 |
|
117 |
|
118 |
+
def _compute(self, predictions, dataset, batch_size: int = 16, device=None, use_aggregator=True):
|
119 |
"""Returns the scores"""
|
120 |
MODEL_CACHE_DIR = "/home/rcardena/.cache/huggingface/"
|
121 |
BASEDIR = "/gfs/team/nlp/users/rcardena/tools/new_evals/ccl_win"
|
|
|
131 |
else:
|
132 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
133 |
|
134 |
+
results = []
|
135 |
+
aggregator = None
|
136 |
+
if use_aggregator:
|
137 |
+
np.random.seed(42)
|
138 |
+
aggregator = scoring.BootstrapAggregator()
|
139 |
+
|
140 |
tokenizer = AutoTokenizer.from_pretrained("roberta-large")
|
141 |
|
142 |
model = AutoModelForSequenceClassification.from_pretrained(os.path.join(BASEDIR,dataset))
|
|
|
156 |
probs = torch.softmax(output.logits,dim=-1).detach().cpu().numpy()
|
157 |
scores.extend(probs[:,0].tolist())
|
158 |
#
|
159 |
+
|
160 |
offset = 0
|
161 |
for _len in len_by_sample:
|
162 |
+
score = float(np.mean(scores[offset:offset+_len]))
|
163 |
+
if use_aggregator:
|
164 |
+
aggregator.add_score({"loc_coh_ccl": score})
|
165 |
+
else:
|
166 |
+
results.append(score)
|
167 |
offset += _len
|
168 |
#
|
169 |
+
outres = {}
|
170 |
+
if use_aggregator:
|
171 |
+
res = aggregator.aggregate()
|
172 |
+
for k in res: outres[k] = res[k].mid
|
173 |
+
else:
|
174 |
+
outres = {"loc_coh_ccl": results}
|
175 |
+
|
176 |
+
return outres
|