add a sacrebleu on the cleaned log-messages (ints and floats replaced by tokens)
Browse files- logmetric.py +23 -5
logmetric.py
CHANGED
@@ -71,6 +71,13 @@ class LogMetric(evaluate.Metric):
|
|
71 |
# Constant regex to get timestrings
|
72 |
timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
|
73 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
|
75 |
|
76 |
|
@@ -153,6 +160,10 @@ class LogMetric(evaluate.Metric):
|
|
153 |
|
154 |
return self.smapeScore(pred_lines_amt, ref_lines_amt)
|
155 |
|
|
|
|
|
|
|
|
|
156 |
|
157 |
# Get differenct scores regarding the content of a log-message
|
158 |
def getLineContentScore(self, pred_logMessages, ref_logMessages):
|
@@ -163,11 +174,17 @@ class LogMetric(evaluate.Metric):
|
|
163 |
|
164 |
smape_length_score = self.get_length_score(pred_logMessages, ref_logMessages)
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
-
return sacrebleu_score, smape_length_score
|
171 |
|
172 |
# get different scores regarding the timestamp
|
173 |
def getTimestampsScore(self, pred_timestamps, ref_timestamps):
|
@@ -252,7 +269,7 @@ class LogMetric(evaluate.Metric):
|
|
252 |
pred_logMessages += (max_logentries - len(pred_logMessages)) * [" "]
|
253 |
ref_logMessages += (max_logentries- len(ref_logMessages)) * [" "]
|
254 |
|
255 |
-
linecontent_sacrebleu, linecontentlength_difference_SMAPE = self.getLineContentScore(pred_logMessages, ref_logMessages)
|
256 |
|
257 |
timestamps_difference_SMAPE, timestamps_formatConsistency_absolute, timestamps_monotinicity_absolute = self.getTimestampsScore(pred_timestamps, ref_timestamps)
|
258 |
|
@@ -264,6 +281,7 @@ class LogMetric(evaluate.Metric):
|
|
264 |
return {"linecount_difference_SMAPE_score": linecount_difference_SMAPE,
|
265 |
"linecontentlength_difference_SMAPE_score": linecontentlength_difference_SMAPE,
|
266 |
"linecontent_sacrebleu_score": linecontent_sacrebleu,
|
|
|
267 |
"timestamps_SMAPE_difference_score": timestamps_difference_SMAPE,
|
268 |
"timestamps_formatConsistency_score": timestamps_formatConsistency_absolute,
|
269 |
"timestamps_monotinicity_score": timestamps_monotinicity_absolute
|
|
|
71 |
# Constant regex to get timestrings
|
72 |
timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
|
73 |
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
74 |
+
|
75 |
+
int_regex = r'(\s|^)(-?\d+)(\s|$)'
|
76 |
+
int_pattern = re.compile(int_regex)
|
77 |
+
|
78 |
+
float_regex = r'(\s|^)(-?\d+\.\d+)(\s|$)'
|
79 |
+
float_pattern = re.compile(float_regex)
|
80 |
+
|
81 |
sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
|
82 |
|
83 |
|
|
|
160 |
|
161 |
return self.smapeScore(pred_lines_amt, ref_lines_amt)
|
162 |
|
163 |
+
def replaceNumbers(self, text:str):
|
164 |
+
text = self.int_pattern.sub(r'\1<|INT|>\3', text)
|
165 |
+
text = self.float_pattern.sub(r'\1<|FLOAT|\3>', text)
|
166 |
+
return text
|
167 |
|
168 |
# Get differenct scores regarding the content of a log-message
|
169 |
def getLineContentScore(self, pred_logMessages, ref_logMessages):
|
|
|
174 |
|
175 |
smape_length_score = self.get_length_score(pred_logMessages, ref_logMessages)
|
176 |
|
177 |
+
vectorized_replaceNumbers = np.vectorize(self.replaceNumbers)
|
178 |
+
|
179 |
+
cleaned_pred_logMessages = vectorized_replaceNumbers(pred_logMessages)
|
180 |
+
cleaned_ref_logMessages = vectorized_replaceNumbers(ref_logMessages)
|
181 |
+
print("before:", pred_logMessages, "\n")
|
182 |
+
print("cleaned:", cleaned_pred_logMessages)
|
183 |
+
|
184 |
+
sacrebleu_withoutExplicitNumbers_score = self.sacrebleu_metric.compute(predictions=cleaned_pred_logMessages, references=cleaned_ref_logMessages)["score"] / 100.0
|
185 |
+
|
186 |
|
187 |
+
return sacrebleu_score, sacrebleu_withoutExplicitNumbers_score, smape_length_score
|
188 |
|
189 |
# get different scores regarding the timestamp
|
190 |
def getTimestampsScore(self, pred_timestamps, ref_timestamps):
|
|
|
269 |
pred_logMessages += (max_logentries - len(pred_logMessages)) * [" "]
|
270 |
ref_logMessages += (max_logentries- len(ref_logMessages)) * [" "]
|
271 |
|
272 |
+
linecontent_sacrebleu, linecontent_sacrebleu_withoutExplicitNumbers, linecontentlength_difference_SMAPE = self.getLineContentScore(pred_logMessages, ref_logMessages)
|
273 |
|
274 |
timestamps_difference_SMAPE, timestamps_formatConsistency_absolute, timestamps_monotinicity_absolute = self.getTimestampsScore(pred_timestamps, ref_timestamps)
|
275 |
|
|
|
281 |
return {"linecount_difference_SMAPE_score": linecount_difference_SMAPE,
|
282 |
"linecontentlength_difference_SMAPE_score": linecontentlength_difference_SMAPE,
|
283 |
"linecontent_sacrebleu_score": linecontent_sacrebleu,
|
284 |
+
"linecontent_sacrebleu_withoutExplicitNumbers_score": linecontent_sacrebleu_withoutExplicitNumbers,
|
285 |
"timestamps_SMAPE_difference_score": timestamps_difference_SMAPE,
|
286 |
"timestamps_formatConsistency_score": timestamps_formatConsistency_absolute,
|
287 |
"timestamps_monotinicity_score": timestamps_monotinicity_absolute
|