Spaces:

svenwey
/

logmetric

Sleeping

svenwey commited on Jun 13, 2024

Commit

3484514

1 Parent(s): 4843304

precompile timestamp_regex

Files changed (1) hide show

logscoremetric.py CHANGED Viewed

@@ -68,6 +68,7 @@ class LogScoreMetric(evaluate.Metric):
     # Constant regex to get timestrings
     timestamp_regex = r'(^\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)'
     sacrebleu = evaluate.load("sacrebleu")
     def _info(self):
@@ -101,8 +102,8 @@ class LogScoreMetric(evaluate.Metric):
         pred = pred.strip(' \t\n\r')
         # Find all timestrings in the log
-        pred_timestrings = re.findall(self.timestamp_regex, pred, re.MULTILINE)
-        ref_timestrings = re.findall(self.timestamp_regex, ref, re.MULTILINE)
         #Check if there is the correct amount of timestrings in the prediction
         if(len(pred_timestrings) != len(ref_timestrings)):
@@ -145,8 +146,8 @@ class LogScoreMetric(evaluate.Metric):
         t_before = time.perf_counter()
         timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
-        predictions_without_timestamps = [re.sub(self.timestamp_regex, '', p, flags=re.MULTILINE) for p in predictions]
-        references_without_timestamps = [re.sub(self.timestamp_regex, '', r, flags=re.MULTILINE) for r in references]
         # Sacrebleu score on logs without timestamps
         sb_results = self.sacrebleu.compute(predictions=predictions_without_timestamps, references=references_without_timestamps)

     # Constant regex to get timestrings
     timestamp_regex = r'(^\d{4}[-/.]\d{2}[-/.]\d{2}(?:[ T]\d{2}[:]\d{2}(?:[:]\d{2}(?:[.,]\d+)?)?(?:Z|[+-]\d{2}[:]\d{2})?)?)'
+    timestamp_pattern = re.compile(timestamp_regex, re.MULTILINE)
     sacrebleu = evaluate.load("sacrebleu")
     def _info(self):
         pred = pred.strip(' \t\n\r')
         # Find all timestrings in the log
+        pred_timestrings = self.timestamp_pattern.findall(pred)
+        ref_timestrings = self.timestamp_pattern.findall(ref)
         #Check if there is the correct amount of timestrings in the prediction
         if(len(pred_timestrings) != len(ref_timestrings)):
         t_before = time.perf_counter()
         timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
+        predictions_without_timestamps = [self.timestamp_pattern.sub('', p) for p in predictions]
+        references_without_timestamps = [self.timestamp_pattern.sub('', r) for r in references]
         # Sacrebleu score on logs without timestamps
         sb_results = self.sacrebleu.compute(predictions=predictions_without_timestamps, references=references_without_timestamps)