add some print statements for simple debugging, change to char-based tokenization
Browse files- logmetric.py +19 -1
logmetric.py
CHANGED
@@ -126,6 +126,11 @@ class LogMetric(evaluate.Metric):
|
|
126 |
# The number of logentries of the reference/prediction which has more/less entries/timestamps
|
127 |
max_logentries = max(len(pred_logentries), len(ref_logentries))
|
128 |
min_logentries = min(len(pred_logentries), len(ref_logentries))
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
# Case there are no timestamps in reference and none in prediction
|
@@ -172,20 +177,33 @@ class LogMetric(evaluate.Metric):
|
|
172 |
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
173 |
if (not matchesPattern):
|
174 |
matchesPatternScore = 0.0
|
|
|
175 |
if (not monotonicallyIncreasing):
|
176 |
monotonicallyIncreasingScore = 0.0
|
|
|
|
|
177 |
|
178 |
except Exception as e:
|
179 |
# e.g. date format not parsable by dateutil.parser
|
180 |
matchesPatternScore = 0.0
|
181 |
monotonicallyIncreasingScore = 0.0
|
182 |
|
183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
|
|
185 |
# TODO: remove later. Used only for testing purposes
|
186 |
assert(len(logmessage_scores) == min_logentries)
|
187 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
188 |
logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
|
|
|
189 |
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
190 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
191 |
|
|
|
126 |
# The number of logentries of the reference/prediction which has more/less entries/timestamps
|
127 |
max_logentries = max(len(pred_logentries), len(ref_logentries))
|
128 |
min_logentries = min(len(pred_logentries), len(ref_logentries))
|
129 |
+
# # print("pred_logentries: ", pred_logentries)
|
130 |
+
# # print("ref_logentries: ", ref_logentries)
|
131 |
+
|
132 |
+
|
133 |
+
# # print("amount of timestrings: max:{}, min:{}".format(max_logentries, min_logentries))
|
134 |
|
135 |
|
136 |
# Case there are no timestamps in reference and none in prediction
|
|
|
177 |
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
178 |
if (not matchesPattern):
|
179 |
matchesPatternScore = 0.0
|
180 |
+
# # print("{} doesn't match pattern {}, setting patternScore to 0".format(ts, pred_timestring_pattern))
|
181 |
if (not monotonicallyIncreasing):
|
182 |
monotonicallyIncreasingScore = 0.0
|
183 |
+
# # print("{} isn't monotonically increasing, setting monotonicallyIncreasingScore to 0".format(ts))
|
184 |
+
|
185 |
|
186 |
except Exception as e:
|
187 |
# e.g. date format not parsable by dateutil.parser
|
188 |
matchesPatternScore = 0.0
|
189 |
monotonicallyIncreasingScore = 0.0
|
190 |
|
191 |
+
# If the maximum length of the two log-message is below 4, BLEU doesn't work. We use exact match in this case
|
192 |
+
if(max(len(pred_lm),len(ref_lm)) < 4):
|
193 |
+
local_bleu_score = 100.0 if pred_lm == ref_lm else 0.0
|
194 |
+
else:
|
195 |
+
local_bleu_score = sacrebleu.compute(predictions=[pred_lm], references=[ref_lm], tokenize="char")["score"]
|
196 |
+
|
197 |
+
logmessage_scores.append(local_bleu_score)
|
198 |
+
# # print(("calculates local bleu score between :{} and {}. Result -> {}").format(repr(pred_lm),repr(ref_lm), local_bleu_score))
|
199 |
+
|
200 |
|
201 |
+
# # print("Ended per-entry checks. All scores: {}".format(logmessage_scores))
|
202 |
# TODO: remove later. Used only for testing purposes
|
203 |
assert(len(logmessage_scores) == min_logentries)
|
204 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
205 |
logmessage_aggregated_score = ((min_logentries / max_logentries) * np.mean(logmessage_scores))
|
206 |
+
# # print("aggregate the scores: result", logmessage_aggregated_score)
|
207 |
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
208 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
209 |
|