Spaces:

svenwey
/

logmetric

Sleeping

App Files Files Community

svenwey commited on Jun 14, 2024

Commit

59248ff

•

1 Parent(s): c9350c0

add some print statements for simple debugging, change to char-based tokenization

Browse files

Files changed (1) hide show

logmetric.py +19 -1

logmetric.py CHANGED Viewed

@@ -126,6 +126,11 @@ class LogMetric(evaluate.Metric):
         # The number of logentries of the reference/prediction which has more/less entries/timestamps
         max_logentries = max(len(pred_logentries), len(ref_logentries))
         min_logentries = min(len(pred_logentries), len(ref_logentries))
         # Case there are no timestamps in reference and none in prediction
@@ -172,20 +177,33 @@ class LogMetric(evaluate.Metric):
                 # If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
                 if (not matchesPattern):
                     matchesPatternScore = 0.0
                 if (not monotonicallyIncreasing):
                     monotonicallyIncreasingScore = 0.0
             except Exception as e:
                 # e.g. date format not parsable by dateutil.parser
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
-            logmessage_scores.append(sacrebleu.compute(predictions=[pred_lm], references=[ref_lm])["score"])
         # TODO: remove later. Used only for testing purposes
         assert(len(logmessage_scores) == min_logentries)
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
         logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
         # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
         return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score

         # The number of logentries of the reference/prediction which has more/less entries/timestamps
         max_logentries = max(len(pred_logentries), len(ref_logentries))
         min_logentries = min(len(pred_logentries), len(ref_logentries))
+        # # print("pred_logentries: ", pred_logentries)
+        # # print("ref_logentries: ", ref_logentries)
+        # # print("amount of timestrings: max:{}, min:{}".format(max_logentries, min_logentries))
         # Case there are no timestamps in reference and none in prediction
                 # If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
                 if (not matchesPattern):
                     matchesPatternScore = 0.0
+                    # # print("{} doesn't match pattern {}, setting patternScore to 0".format(ts, pred_timestring_pattern))
                 if (not monotonicallyIncreasing):
                     monotonicallyIncreasingScore = 0.0
+                    # # print("{} isn't monotonically increasing, setting monotonicallyIncreasingScore to 0".format(ts))
             except Exception as e:
                 # e.g. date format not parsable by dateutil.parser
                 matchesPatternScore = 0.0
                 monotonicallyIncreasingScore = 0.0
+            # If the maximum length of the two log-message is below 4, BLEU doesn't work. We use exact match in this case
+            if(max(len(pred_lm),len(ref_lm)) < 4):
+                local_bleu_score = 100.0 if pred_lm == ref_lm else 0.0
+            else:
+                local_bleu_score = sacrebleu.compute(predictions=[pred_lm], references=[ref_lm], tokenize="char")["score"]
+            logmessage_scores.append(local_bleu_score)
+            # # print(("calculates local bleu score between :{} and {}. Result -> {}").format(repr(pred_lm),repr(ref_lm), local_bleu_score))
+        # # print("Ended per-entry checks. All scores: {}".format(logmessage_scores))
         # TODO: remove later. Used only for testing purposes
         assert(len(logmessage_scores) == min_logentries)
         # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
         logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
+        # # print("aggregate the scores: result", logmessage_aggregated_score)
         # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
         return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score