svenwey commited on
Commit
41ade47
·
1 Parent(s): 73e4b59

make computation of bleu_scores once for all prediction-references outside of loop for efficiency

Browse files
Files changed (1) hide show
  1. logmetric.py +8 -15
logmetric.py CHANGED
@@ -69,7 +69,7 @@ class LogMetric(evaluate.Metric):
69
  # Constant regex to get timestrings
70
  timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
71
  timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
72
- sacrebleu = evaluate.load("sacrebleu")
73
 
74
 
75
  def _info(self):
@@ -98,7 +98,7 @@ class LogMetric(evaluate.Metric):
98
  # TODO: Download external resources if needed
99
  pass
100
 
101
- def getLogMetric(self, pred : str, ref : str, sacrebleu):
102
  ref = ref.strip(' \t\n\r')
103
  pred = pred.strip(' \t\n\r')
104
 
@@ -188,21 +188,14 @@ class LogMetric(evaluate.Metric):
188
  matchesPatternScore = 0.0
189
  monotonicallyIncreasingScore = 0.0
190
 
191
- # If the maximum length of the two log-message is below 4, BLEU doesn't work. We use exact match in this case
192
- if(max(len(pred_lm),len(ref_lm)) < 4):
193
- local_bleu_score = 100.0 if pred_lm == ref_lm else 0.0
194
- else:
195
- local_bleu_score = sacrebleu.compute(predictions=[pred_lm], references=[ref_lm], tokenize="char")["score"]
196
-
197
- logmessage_scores.append(local_bleu_score)
198
- # # print(("calculates local bleu score between :{} and {}. Result -> {}").format(repr(pred_lm),repr(ref_lm), local_bleu_score))
199
 
200
 
201
- # # print("Ended per-entry checks. All scores: {}".format(logmessage_scores))
202
- # TODO: remove later. Used only for testing purposes
203
- assert(len(logmessage_scores) == min_logentries)
204
  # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
205
- logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
206
  # # print("aggregate the scores: result", logmessage_aggregated_score)
207
  # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
208
  return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
@@ -213,7 +206,7 @@ class LogMetric(evaluate.Metric):
213
  # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
214
 
215
  t_before_logmetric = time.perf_counter()
216
- timestamp_score = np.mean([self.getLogMetric(p,r, self.sacrebleu) for p,r in zip(predictions,references)])
217
  t_after_logmetric = time.perf_counter()
218
 
219
  logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"
 
69
  # Constant regex to get timestrings
70
  timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
71
  timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
72
+ sentencesimilarity_metric = evaluate.load("sacrebleu")
73
 
74
 
75
  def _info(self):
 
98
  # TODO: Download external resources if needed
99
  pass
100
 
101
+ def getLogMetric(self, pred : str, ref : str, sentencesimilarity_metric):
102
  ref = ref.strip(' \t\n\r')
103
  pred = pred.strip(' \t\n\r')
104
 
 
188
  matchesPatternScore = 0.0
189
  monotonicallyIncreasingScore = 0.0
190
 
191
+ local_score = sentencesimilarity_metric.compute(predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
192
+ references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
193
+ tokenize="char")["score"]
194
+
 
 
 
 
195
 
196
 
 
 
 
197
  # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
198
+ logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
199
  # # print("aggregate the scores: result", logmessage_aggregated_score)
200
  # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
201
  return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
 
206
  # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
207
 
208
  t_before_logmetric = time.perf_counter()
209
+ timestamp_score = np.mean([self.getLogMetric(p,r, self.sentencesimilarity_metric) for p,r in zip(predictions,references)])
210
  t_after_logmetric = time.perf_counter()
211
 
212
  logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"