svenwey commited on
Commit
796bd91
·
1 Parent(s): 41ade47

various cleanups

Browse files
Files changed (1) hide show
  1. logmetric.py +11 -26
logmetric.py CHANGED
@@ -102,10 +102,8 @@ class LogMetric(evaluate.Metric):
102
  ref = ref.strip(' \t\n\r')
103
  pred = pred.strip(' \t\n\r')
104
 
105
- # Find all timestrings in the log
106
- # pred_timestrings = self.timestamp_pattern.findall(pred)
107
  pred_split_log = self.timestamp_pattern.split(pred)
108
- # ref_timestrings = self.timestamp_pattern.findall(ref)
109
  ref_split_log = self.timestamp_pattern.split(ref)
110
 
111
  # This should alwas hold (safety feature)
@@ -126,12 +124,6 @@ class LogMetric(evaluate.Metric):
126
  # The number of logentries of the reference/prediction which has more/less entries/timestamps
127
  max_logentries = max(len(pred_logentries), len(ref_logentries))
128
  min_logentries = min(len(pred_logentries), len(ref_logentries))
129
- # # print("pred_logentries: ", pred_logentries)
130
- # # print("ref_logentries: ", ref_logentries)
131
-
132
-
133
- # # print("amount of timestrings: max:{}, min:{}".format(max_logentries, min_logentries))
134
-
135
 
136
  # Case there are no timestamps in reference and none in prediction
137
  # we can compute bleu directly from original prediction (ref will be empty, but we offload this to the bleu metric)
@@ -155,16 +147,12 @@ class LogMetric(evaluate.Metric):
155
  matchesPatternScore = 100.0
156
  monotonicallyIncreasingScore = 100.0
157
 
158
- # An array to save score per logentry
159
- logmessage_scores = []
160
- # TODO: Idea to penalize too long/ short logs-> add the amount of(max_len - min_len) between timestamps times score 0 at the end
161
  # A variable to save the previous timestamp (as datetime obj) to check monotonicity
162
  prev_datetime = None
163
  # Convert matches to datetime objects
164
  # TODO TODO TODO fix this:
165
  for i in range(min_logentries):
166
- ts, pred_lm = pred_logentries[i]
167
- _, ref_lm = ref_logentries[i]
168
  try:
169
  # Check if the format matches with the format of the first timestamp
170
  # TODO!! Check this later, maybe it is too restricting for training a llm
@@ -175,29 +163,26 @@ class LogMetric(evaluate.Metric):
175
  prev_datetime = cur_datetime
176
 
177
  # If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
178
- if (not matchesPattern):
179
- matchesPatternScore = 0.0
180
- # # print("{} doesn't match pattern {}, setting patternScore to 0".format(ts, pred_timestring_pattern))
181
- if (not monotonicallyIncreasing):
182
- monotonicallyIncreasingScore = 0.0
183
- # # print("{} isn't monotonically increasing, setting monotonicallyIncreasingScore to 0".format(ts))
184
-
185
 
186
  except Exception as e:
187
  # e.g. date format not parsable by dateutil.parser
188
  matchesPatternScore = 0.0
189
  monotonicallyIncreasingScore = 0.0
190
 
191
- local_score = sentencesimilarity_metric.compute(predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
192
- references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
193
- tokenize="char")["score"]
 
 
194
 
195
 
196
 
197
  # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
198
  logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
199
- # # print("aggregate the scores: result", logmessage_aggregated_score)
200
- # Correct amt of timestrings, monotonically increasing, consistent + (by dateutil.parser) parsable format
201
  return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
202
 
203
  def _compute(self, predictions, references):
 
102
  ref = ref.strip(' \t\n\r')
103
  pred = pred.strip(' \t\n\r')
104
 
105
+ # Split log on timestamps
 
106
  pred_split_log = self.timestamp_pattern.split(pred)
 
107
  ref_split_log = self.timestamp_pattern.split(ref)
108
 
109
  # This should alwas hold (safety feature)
 
124
  # The number of logentries of the reference/prediction which has more/less entries/timestamps
125
  max_logentries = max(len(pred_logentries), len(ref_logentries))
126
  min_logentries = min(len(pred_logentries), len(ref_logentries))
 
 
 
 
 
 
127
 
128
  # Case there are no timestamps in reference and none in prediction
129
  # we can compute bleu directly from original prediction (ref will be empty, but we offload this to the bleu metric)
 
147
  matchesPatternScore = 100.0
148
  monotonicallyIncreasingScore = 100.0
149
 
 
 
 
150
  # A variable to save the previous timestamp (as datetime obj) to check monotonicity
151
  prev_datetime = None
152
  # Convert matches to datetime objects
153
  # TODO TODO TODO fix this:
154
  for i in range(min_logentries):
155
+ ts = pred_logentries[i][0]
 
156
  try:
157
  # Check if the format matches with the format of the first timestamp
158
  # TODO!! Check this later, maybe it is too restricting for training a llm
 
163
  prev_datetime = cur_datetime
164
 
165
  # If one entry doesn't fulfill the matching pattern property or the monotinicity property, set to 0 for whole log
166
+ matchesPatternScore = 0.0 if (not matchesPattern) else matchesPatternScore
167
+ monotonicallyIncreasingScore = 0.0 if (not monotonicallyIncreasing) else monotonicallyIncreasingScore
168
+
 
 
 
 
169
 
170
  except Exception as e:
171
  # e.g. date format not parsable by dateutil.parser
172
  matchesPatternScore = 0.0
173
  monotonicallyIncreasingScore = 0.0
174
 
175
+ # We calculate the overall local score of all the log-entries (log-messages)
176
+ local_score = sentencesimilarity_metric.compute(
177
+ predictions=(list(map(lambda t: t[1], pred_logentries))[:min_logentries]),
178
+ references=(list(map(lambda t: t[1], ref_logentries))[:min_logentries]),
179
+ tokenize="char")["score"]
180
 
181
 
182
 
183
  # we aggregate the bleu scores where we weight the difference in logentries with a score of 0
184
  logmessage_aggregated_score = ((min_logentries / max_logentries) * local_score)
185
+ # return weighted overall score of all the different scores
 
186
  return 0.2 * monotonicallyIncreasingScore + 0.1 * matchesPatternScore + 0.7 * logmessage_aggregated_score
187
 
188
  def _compute(self, predictions, references):