precompile timestamp_regex
Browse files- logscoremetric.py +5 -4
logscoremetric.py
CHANGED
@@ -68,6 +68,7 @@ class LogScoreMetric(evaluate.Metric):
|
|
68 |
|
69 |
# Constant regex to get timestrings
|
70 |
timestamp_regex = r'(^\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)'
|
|
|
71 |
sacrebleu = evaluate.load("sacrebleu")
|
72 |
|
73 |
def _info(self):
|
@@ -101,8 +102,8 @@ class LogScoreMetric(evaluate.Metric):
|
|
101 |
pred = pred.strip(' \t\n\r')
|
102 |
|
103 |
# Find all timestrings in the log
|
104 |
-
pred_timestrings =
|
105 |
-
ref_timestrings =
|
106 |
|
107 |
#Check if there is the correct amount of timestrings in the prediction
|
108 |
if(len(pred_timestrings) != len(ref_timestrings)):
|
@@ -145,8 +146,8 @@ class LogScoreMetric(evaluate.Metric):
|
|
145 |
t_before = time.perf_counter()
|
146 |
|
147 |
timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
|
148 |
-
predictions_without_timestamps = [
|
149 |
-
references_without_timestamps = [
|
150 |
|
151 |
# Sacrebleu score on logs without timestamps
|
152 |
sb_results = self.sacrebleu.compute(predictions=predictions_without_timestamps, references=references_without_timestamps)
|
|
|
68 |
|
69 |
# Constant regex to get timestrings
|
70 |
timestamp_regex = r'(^\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)'
|
71 |
+
timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
|
72 |
sacrebleu = evaluate.load("sacrebleu")
|
73 |
|
74 |
def _info(self):
|
|
|
102 |
pred = pred.strip(' \t\n\r')
|
103 |
|
104 |
# Find all timestrings in the log
|
105 |
+
pred_timestrings = self.timestamp_pattern.findall(pred)
|
106 |
+
ref_timestrings = self.timestamp_pattern.findall(ref)
|
107 |
|
108 |
#Check if there is the correct amount of timestrings in the prediction
|
109 |
if(len(pred_timestrings) != len(ref_timestrings)):
|
|
|
146 |
t_before = time.perf_counter()
|
147 |
|
148 |
timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
|
149 |
+
predictions_without_timestamps = [self.timestamp_pattern.sub('', p) for p in predictions]
|
150 |
+
references_without_timestamps = [self.timestamp_pattern.sub('', r) for r in references]
|
151 |
|
152 |
# Sacrebleu score on logs without timestamps
|
153 |
sb_results = self.sacrebleu.compute(predictions=predictions_without_timestamps, references=references_without_timestamps)
|