make computation of bleu_scores once for all prediction-references outside of loop for efficiency
Browse files- logmetric.py +8 -15
logmetric.py
CHANGED
@@ -69,7 +69,7 @@ class LogMetric(evaluate.Metric):
|
|
69 |
# Constant regex to get timestrings
|
70 |
timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
|
71 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
72 |
-
|
73 |
|
74 |
|
75 |
def _info(self):
|
@@ -98,7 +98,7 @@ class LogMetric(evaluate.Metric):
|
|
98 |
# TODO: Download external resources if needed
|
99 |
pass
|
100 |
|
101 |
-
def getLogMetric(self, pred : str, ref : str,
|
102 |
ref = ref.strip(' \t\n\r')
|
103 |
pred = pred.strip(' \t\n\r')
|
104 |
|
@@ -188,21 +188,14 @@ class LogMetric(evaluate.Metric):
|
|
188 |
matchesPatternScore = 0.0
|
189 |
monotonicallyIncreasingScore = 0.0
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
local_bleu_score = sacrebleu.compute(predictions=[pred_lm], references=[ref_lm], tokenize="char")["score"]
|
196 |
-
|
197 |
-
logmessage_scores.append(local_bleu_score)
|
198 |
-
# # print(("calculates local bleu score between :{} and {}. Result -> {}").format(repr(pred_lm),repr(ref_lm), local_bleu_score))
|
199 |
|
200 |
|
201 |
-
# # print("Ended per-entry checks. All scores: {}".format(logmessage_scores))
|
202 |
-
# TODO: remove later. Used only for testing purposes
|
203 |
-
assert(len(logmessage_scores) == min_logentries)
|
204 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
205 |
-
logmessage_aggregated_score = ((min_logentries / max_logentries) *
|
206 |
# # print("aggregate the scores: result", logmessage_aggregated_score)
|
207 |
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
208 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
@@ -213,7 +206,7 @@ class LogMetric(evaluate.Metric):
|
|
213 |
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
214 |
|
215 |
t_before_logmetric = time.perf_counter()
|
216 |
-
timestamp_score = np.mean([self.getLogMetric(p,r, self.
|
217 |
t_after_logmetric = time.perf_counter()
|
218 |
|
219 |
logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
|
|
|
69 |
# Constant regex to get timestrings
|
70 |
timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
|
71 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
72 |
+
sentencesimilarity_metric = evaluate.load("sacrebleu")
|
73 |
|
74 |
|
75 |
def _info(self):
|
|
|
98 |
# TODO: Download external resources if needed
|
99 |
pass
|
100 |
|
101 |
+
def getLogMetric(self, pred : str, ref : str, sentencesimilarity_metric):
|
102 |
ref = ref.strip(' \t\n\r')
|
103 |
pred = pred.strip(' \t\n\r')
|
104 |
|
|
|
188 |
matchesPatternScore = 0.0
|
189 |
monotonicallyIncreasingScore = 0.0
|
190 |
|
191 |
+
local_score = sentencesimilarity_metric.compute(predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
|
192 |
+
references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
|
193 |
+
tokenize="char")["score"]
|
194 |
+
|
|
|
|
|
|
|
|
|
195 |
|
196 |
|
|
|
|
|
|
|
197 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
198 |
+
logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
|
199 |
# # print("aggregate the scores: result", logmessage_aggregated_score)
|
200 |
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
201 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
|
|
206 |
# TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
|
207 |
|
208 |
t_before_logmetric = time.perf_counter()
|
209 |
+
timestamp_score = np.mean([self.getLogMetric(p,r, self.sentencesimilarity_metric) for p,r in zip(predictions,references)])
|
210 |
t_after_logmetric = time.perf_counter()
|
211 |
|
212 |
logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
|