Spaces:

svenwey
/

logmetric

Sleeping

App Files Files Community

svenwey commited on Jun 14, 2024

Commit

796bd91

1 Parent(s): 41ade47

various cleanups

Browse files

Files changed (1) hide show

logmetric.py +11 -26

logmetric.py CHANGED Viewed

@@ -102,10 +102,8 @@ class LogMetric(evaluate.Metric):
         ref = ref.strip(' \t\n\r')
         pred = pred.strip(' \t\n\r')
-        # Find all timestrings in the log
-        # pred_timestrings = self.timestamp_pattern.findall(pred)
         pred_split_log = self.timestamp_pattern.split(pred)
-        # ref_timestrings = self.timestamp_pattern.findall(ref)
         ref_split_log = self.timestamp_pattern.split(ref)
         # This should alwas hold (safety feature)
@@ -126,12 +124,6 @@ class LogMetric(evaluate.Metric):
         # The number of logentries of the reference/prediction which has more/less entries/timestamps
         max_logentries = max(len(pred_logentries), len(ref_logentries))
         min_logentries = min(len(pred_logentries), len(ref_logentries))
-        # # print("pred_logentries: ", pred_logentries)
-        # # print("ref_logentries: ", ref_logentries)
-        # # print("amount of timestrings: max:{}, min:{}".format(max_logentries, min_logentries))
         # Case there are no timestamps in reference and none in prediction
         # we can compute bleu directly from original prediction (ref will be empty, but we offload this to the bleu metric)
@@ -155,16 +147,12 @@ class LogMetric(evaluate.Metric):
         matchesPatternScore = 100.0
         monotonicallyIncreasingScore = 100.0
-        # An array to save score per logentry
-        logmessage_scores = []
-        # TODO: Idea to penalize too long/ short logs-> add the amount of(max_len - min_len) between timestamps times score 0 at the end
         # A variable to save the previous timestamp (as datetime obj) to check monotonicity
         prev_datetime = None
         # Convert matches to datetime objects
         # TODO TODO TODO fix this:
         for i in range(min_logentries):
-            ts, pred_lm = pred_logentries[i]
-            _, ref_lm = ref_logentries[i]
             try:
                 # Check if the format matches with the format of the first timestamp
                 # TODO!! Check this later, maybe it is too restricting for training a llm
@@ -175,29 +163,26 @@ class LogMetric(evaluate.Metric):
                 prev_datetime = cur_datetime
                 # If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
-                if (not matchesPattern):
-                    matchesPatternScore = 0.0
-                    # # print("{} doesn't match pattern {}, setting patternScore to 0".format(ts, pred_timestring_pattern))
-                if (not monotonicallyIncreasing):
-                    monotonicallyIncreasingScore = 0.0
-                    # # print("{} isn't monotonically increasing, setting monotonicallyIncreasingScore to 0".format(ts))
             except Exception as e:
                 # e.g. date format not parsable by dateutil.parser
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
-        local_score = sentencesimilarity_metric.compute(predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
-                                             references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
-                                             tokenize="char")["score"]
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
         logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
-        # # print("aggregate the scores: result", logmessage_aggregated_score)
-        # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
         return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
     def _compute(self, predictions, references):

         ref = ref.strip(' \t\n\r')
         pred = pred.strip(' \t\n\r')
+        # Split log on timestamps
         pred_split_log = self.timestamp_pattern.split(pred)
         ref_split_log = self.timestamp_pattern.split(ref)
         # This should alwas hold (safety feature)
         # The number of logentries of the reference/prediction which has more/less entries/timestamps
         max_logentries = max(len(pred_logentries), len(ref_logentries))
         min_logentries = min(len(pred_logentries), len(ref_logentries))
         # Case there are no timestamps in reference and none in prediction
         # we can compute bleu directly from original prediction (ref will be empty, but we offload this to the bleu metric)
         matchesPatternScore = 100.0
         monotonicallyIncreasingScore = 100.0
         # A variable to save the previous timestamp (as datetime obj) to check monotonicity
         prev_datetime = None
         # Convert matches to datetime objects
         # TODO TODO TODO fix this:
         for i in range(min_logentries):
+            ts = pred_logentries[i][0]
             try:
                 # Check if the format matches with the format of the first timestamp
                 # TODO!! Check this later, maybe it is too restricting for training a llm
                 prev_datetime = cur_datetime
                 # If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
+                matchesPatternScore = 0.0 if (not matchesPattern) else matchesPatternScore
+                monotonicallyIncreasingScore = 0.0 if (not monotonicallyIncreasing) else monotonicallyIncreasingScore
             except Exception as e:
                 # e.g. date format not parsable by dateutil.parser
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
+        # We calculate the overall local score of all the log-entries (log-messages)
+        local_score = sentencesimilarity_metric.compute(
+                            predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
+                            references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
+                            tokenize="char")["score"]
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
         logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
+        # return weighted overall score of all the different scores
         return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
     def _compute(self, predictions, references):