various cleanups
Browse files- logmetric.py +11 -26
logmetric.py
CHANGED
@@ -102,10 +102,8 @@ class LogMetric(evaluate.Metric):
|
|
102 |
ref = ref.strip(' \t\n\r')
|
103 |
pred = pred.strip(' \t\n\r')
|
104 |
|
105 |
-
#
|
106 |
-
# pred_timestrings = self.timestamp_pattern.findall(pred)
|
107 |
pred_split_log = self.timestamp_pattern.split(pred)
|
108 |
-
# ref_timestrings = self.timestamp_pattern.findall(ref)
|
109 |
ref_split_log = self.timestamp_pattern.split(ref)
|
110 |
|
111 |
# This should alwas hold (safety feature)
|
@@ -126,12 +124,6 @@ class LogMetric(evaluate.Metric):
|
|
126 |
# The number of logentries of the reference/prediction which has more/less entries/timestamps
|
127 |
max_logentries = max(len(pred_logentries), len(ref_logentries))
|
128 |
min_logentries = min(len(pred_logentries), len(ref_logentries))
|
129 |
-
# # print("pred_logentries: ", pred_logentries)
|
130 |
-
# # print("ref_logentries: ", ref_logentries)
|
131 |
-
|
132 |
-
|
133 |
-
# # print("amount of timestrings: max:{}, min:{}".format(max_logentries, min_logentries))
|
134 |
-
|
135 |
|
136 |
# Case there are no timestamps in reference and none in prediction
|
137 |
# we can compute bleu directly from original prediction (ref will be empty, but we offload this to the bleu metric)
|
@@ -155,16 +147,12 @@ class LogMetric(evaluate.Metric):
|
|
155 |
matchesPatternScore = 100.0
|
156 |
monotonicallyIncreasingScore = 100.0
|
157 |
|
158 |
-
# An array to save score per logentry
|
159 |
-
logmessage_scores = []
|
160 |
-
# TODO: Idea to penalize too long/ short logs-> add the amount of(max_len - min_len) between timestamps times score 0 at the end
|
161 |
# A variable to save the previous timestamp (as datetime obj) to check monotonicity
|
162 |
prev_datetime = None
|
163 |
# Convert matches to datetime objects
|
164 |
# TODO TODO TODO fix this:
|
165 |
for i in range(min_logentries):
|
166 |
-
ts
|
167 |
-
_, ref_lm = ref_logentries[i]
|
168 |
try:
|
169 |
# Check if the format matches with the format of the first timestamp
|
170 |
# TODO!! Check this later, maybe it is too restricting for training a llm
|
@@ -175,29 +163,26 @@ class LogMetric(evaluate.Metric):
|
|
175 |
prev_datetime = cur_datetime
|
176 |
|
177 |
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
178 |
-
if (not matchesPattern)
|
179 |
-
|
180 |
-
|
181 |
-
if (not monotonicallyIncreasing):
|
182 |
-
monotonicallyIncreasingScore = 0.0
|
183 |
-
# # print("{} isn't monotonically increasing, setting monotonicallyIncreasingScore to 0".format(ts))
|
184 |
-
|
185 |
|
186 |
except Exception as e:
|
187 |
# e.g. date format not parsable by dateutil.parser
|
188 |
matchesPatternScore = 0.0
|
189 |
monotonicallyIncreasingScore = 0.0
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
|
|
|
|
194 |
|
195 |
|
196 |
|
197 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
198 |
logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
|
199 |
-
#
|
200 |
-
# Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
|
201 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
202 |
|
203 |
def _compute(self, predictions, references):
|
|
|
102 |
ref = ref.strip(' \t\n\r')
|
103 |
pred = pred.strip(' \t\n\r')
|
104 |
|
105 |
+
# Split log on timestamps
|
|
|
106 |
pred_split_log = self.timestamp_pattern.split(pred)
|
|
|
107 |
ref_split_log = self.timestamp_pattern.split(ref)
|
108 |
|
109 |
# This should alwas hold (safety feature)
|
|
|
124 |
# The number of logentries of the reference/prediction which has more/less entries/timestamps
|
125 |
max_logentries = max(len(pred_logentries), len(ref_logentries))
|
126 |
min_logentries = min(len(pred_logentries), len(ref_logentries))
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
# Case there are no timestamps in reference and none in prediction
|
129 |
# we can compute bleu directly from original prediction (ref will be empty, but we offload this to the bleu metric)
|
|
|
147 |
matchesPatternScore = 100.0
|
148 |
monotonicallyIncreasingScore = 100.0
|
149 |
|
|
|
|
|
|
|
150 |
# A variable to save the previous timestamp (as datetime obj) to check monotonicity
|
151 |
prev_datetime = None
|
152 |
# Convert matches to datetime objects
|
153 |
# TODO TODO TODO fix this:
|
154 |
for i in range(min_logentries):
|
155 |
+
ts = pred_logentries[i][0]
|
|
|
156 |
try:
|
157 |
# Check if the format matches with the format of the first timestamp
|
158 |
# TODO!! Check this later, maybe it is too restricting for training a llm
|
|
|
163 |
prev_datetime = cur_datetime
|
164 |
|
165 |
# If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
|
166 |
+
matchesPatternScore = 0.0 if (not matchesPattern) else matchesPatternScore
|
167 |
+
monotonicallyIncreasingScore = 0.0 if (not monotonicallyIncreasing) else monotonicallyIncreasingScore
|
168 |
+
|
|
|
|
|
|
|
|
|
169 |
|
170 |
except Exception as e:
|
171 |
# e.g. date format not parsable by dateutil.parser
|
172 |
matchesPatternScore = 0.0
|
173 |
monotonicallyIncreasingScore = 0.0
|
174 |
|
175 |
+
# We calculate the overall local score of all the log-entries (log-messages)
|
176 |
+
local_score = sentencesimilarity_metric.compute(
|
177 |
+
predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
|
178 |
+
references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
|
179 |
+
tokenize="char")["score"]
|
180 |
|
181 |
|
182 |
|
183 |
# we aggregate the bleu scores where we weight the difference in logentries with a score of 0
|
184 |
logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
|
185 |
+
# return weighted overall score of all the different scores
|
|
|
186 |
return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
|
187 |
|
188 |
def _compute(self, predictions, references):
|