Spaces:

svenwey
/

logmetric

Sleeping

App Files Files Community

svenwey commited on Jun 14, 2024

Commit

41ade47

1 Parent(s): 73e4b59

make computation of bleu_scores once for all prediction-references outside of loop for efficiency

Browse files

Files changed (1) hide show

logmetric.py +8 -15

logmetric.py CHANGED Viewed

@@ -69,7 +69,7 @@ class LogMetric(evaluate.Metric):
     # Constant regex to get timestrings
     timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
     timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
-    sacrebleu = evaluate.load("sacrebleu")
     def _info(self):
@@ -98,7 +98,7 @@ class LogMetric(evaluate.Metric):
         # TODO: Download external resources if needed
         pass
-    def getLogMetric(self, pred : str, ref : str, sacrebleu):
         ref = ref.strip(' \t\n\r')
         pred = pred.strip(' \t\n\r')
@@ -188,21 +188,14 @@ class LogMetric(evaluate.Metric):
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
-            # If the maximum length of the two log-message is below 4, BLEU doesn't work. We use exact match in this case
-            if(max(len(pred_lm),len(ref_lm)) < 4):
-                local_bleu_score = 100.0 if pred_lm == ref_lm else 0.0
-            else:
-                local_bleu_score = sacrebleu.compute(predictions=[pred_lm], references=[ref_lm], tokenize="char")["score"]
-            logmessage_scores.append(local_bleu_score)
-            # # print(("calculates local bleu score between :{} and {}. Result -> {}").format(repr(pred_lm),repr(ref_lm), local_bleu_score))
-        # # print("Ended per-entry checks. All scores: {}".format(logmessage_scores))
-        # TODO: remove later. Used only for testing purposes
-        assert(len(logmessage_scores) == min_logentries)
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
-        logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
         # # print("aggregate the scores: result", logmessage_aggregated_score)
         # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
         return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
@@ -213,7 +206,7 @@ class LogMetric(evaluate.Metric):
         # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
         t_before_logmetric = time.perf_counter()
-        timestamp_score = np.mean([self.getLogMetric(p,r, self.sacrebleu) for p,r in zip(predictions,references)])
         t_after_logmetric = time.perf_counter()
         logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"

     # Constant regex to get timestrings
     timestamp_regex = r'^\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*'
     timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
+    sentencesimilarity_metric = evaluate.load("sacrebleu")
     def _info(self):
         # TODO: Download external resources if needed
         pass
+    def getLogMetric(self, pred : str, ref : str, sentencesimilarity_metric):
         ref = ref.strip(' \t\n\r')
         pred = pred.strip(' \t\n\r')
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
+        local_score = sentencesimilarity_metric.compute(predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
+                                             references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
+                                             tokenize="char")["score"]
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
+        logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
         # # print("aggregate the scores: result", logmessage_aggregated_score)
         # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
         return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
         # TODO: get separate log entries (split before timestamps), replace timestamps with token and compare the log entry with BLEU
         t_before_logmetric = time.perf_counter()
+        timestamp_score = np.mean([self.getLogMetric(p,r, self.sentencesimilarity_metric) for p,r in zip(predictions,references)])
         t_after_logmetric = time.perf_counter()
         logmetric_duration = f" {t_after_logmetric - t_before_logmetric:0.10f}"