svenwey commited on
Commit
81740ff
·
1 Parent(s): 4873587

add a sacrebleu on the cleaned log-messages (ints and floats replaced by tokens)

Browse files
Files changed (1) hide show
  1. logmetric.py +23 -5
logmetric.py CHANGED
@@ -71,6 +71,13 @@ class LogMetric(evaluate.Metric):
71
  # Constant regex to get timestrings
72
  timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
73
  timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
 
 
 
 
 
 
 
74
  sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
75
 
76
 
@@ -153,6 +160,10 @@ class LogMetric(evaluate.Metric):
153
 
154
  return self.smapeScore(pred_lines_amt, ref_lines_amt)
155
 
 
 
 
 
156
 
157
  # Get differenct scores regarding the content of a log-message
158
  def getLineContentScore(self, pred_logMessages, ref_logMessages):
@@ -163,11 +174,17 @@ class LogMetric(evaluate.Metric):
163
 
164
  smape_length_score = self.get_length_score(pred_logMessages, ref_logMessages)
165
 
166
- # Split the single log-messages (for jaccard)
167
- preds_split = [p.split() for p in pred_logMessages]
168
- refs_split = [r.split() for r in ref_logMessages]
 
 
 
 
 
 
169
 
170
- return sacrebleu_score, smape_length_score
171
 
172
  # get different scores regarding the timestamp
173
  def getTimestampsScore(self, pred_timestamps, ref_timestamps):
@@ -252,7 +269,7 @@ class LogMetric(evaluate.Metric):
252
  pred_logMessages += (max_logentries - len(pred_logMessages)) * [" "]
253
  ref_logMessages += (max_logentries- len(ref_logMessages)) * [" "]
254
 
255
- linecontent_sacrebleu, linecontentlength_difference_SMAPE = self.getLineContentScore(pred_logMessages, ref_logMessages)
256
 
257
  timestamps_difference_SMAPE, timestamps_formatConsistency_absolute, timestamps_monotinicity_absolute = self.getTimestampsScore(pred_timestamps, ref_timestamps)
258
 
@@ -264,6 +281,7 @@ class LogMetric(evaluate.Metric):
264
  return {"linecount_difference_SMAPE_score": linecount_difference_SMAPE,
265
  "linecontentlength_difference_SMAPE_score": linecontentlength_difference_SMAPE,
266
  "linecontent_sacrebleu_score": linecontent_sacrebleu,
 
267
  "timestamps_SMAPE_difference_score": timestamps_difference_SMAPE,
268
  "timestamps_formatConsistency_score": timestamps_formatConsistency_absolute,
269
  "timestamps_monotinicity_score": timestamps_monotinicity_absolute
 
71
  # Constant regex to get timestrings
72
  timestamp_regex = r'^\s*\[?\s*(\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)\s*\]?\s*'
73
  timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
74
+
75
+ int_regex = r'(\s|^)(-?\d+)(\s|$)'
76
+ int_pattern = re.compile(int_regex)
77
+
78
+ float_regex = r'(\s|^)(-?\d+\.\d+)(\s|$)'
79
+ float_pattern = re.compile(float_regex)
80
+
81
  sacrebleu_metric = evaluate.load("evaluate-metric/sacrebleu")
82
 
83
 
 
160
 
161
  return self.smapeScore(pred_lines_amt, ref_lines_amt)
162
 
163
+ def replaceNumbers(self, text:str):
164
+ text = self.int_pattern.sub(r'\1<|INT|>\3', text)
165
+ text = self.float_pattern.sub(r'\1<|FLOAT|\3>', text)
166
+ return text
167
 
168
  # Get differenct scores regarding the content of a log-message
169
  def getLineContentScore(self, pred_logMessages, ref_logMessages):
 
174
 
175
  smape_length_score = self.get_length_score(pred_logMessages, ref_logMessages)
176
 
177
+ vectorized_replaceNumbers = np.vectorize(self.replaceNumbers)
178
+
179
+ cleaned_pred_logMessages = vectorized_replaceNumbers(pred_logMessages)
180
+ cleaned_ref_logMessages = vectorized_replaceNumbers(ref_logMessages)
181
+ print("before:", pred_logMessages, "\n")
182
+ print("cleaned:", cleaned_pred_logMessages)
183
+
184
+ sacrebleu_withoutExplicitNumbers_score = self.sacrebleu_metric.compute(predictions=cleaned_pred_logMessages, references=cleaned_ref_logMessages)["score"] / 100.0
185
+
186
 
187
+ return sacrebleu_score, sacrebleu_withoutExplicitNumbers_score, smape_length_score
188
 
189
  # get different scores regarding the timestamp
190
  def getTimestampsScore(self, pred_timestamps, ref_timestamps):
 
269
  pred_logMessages += (max_logentries - len(pred_logMessages)) * [" "]
270
  ref_logMessages += (max_logentries- len(ref_logMessages)) * [" "]
271
 
272
+ linecontent_sacrebleu, linecontent_sacrebleu_withoutExplicitNumbers, linecontentlength_difference_SMAPE = self.getLineContentScore(pred_logMessages, ref_logMessages)
273
 
274
  timestamps_difference_SMAPE, timestamps_formatConsistency_absolute, timestamps_monotinicity_absolute = self.getTimestampsScore(pred_timestamps, ref_timestamps)
275
 
 
281
  return {"linecount_difference_SMAPE_score": linecount_difference_SMAPE,
282
  "linecontentlength_difference_SMAPE_score": linecontentlength_difference_SMAPE,
283
  "linecontent_sacrebleu_score": linecontent_sacrebleu,
284
+ "linecontent_sacrebleu_withoutExplicitNumbers_score": linecontent_sacrebleu_withoutExplicitNumbers,
285
  "timestamps_SMAPE_difference_score": timestamps_difference_SMAPE,
286
  "timestamps_formatConsistency_score": timestamps_formatConsistency_absolute,
287
  "timestamps_monotinicity_score": timestamps_monotinicity_absolute