use jaccard again for log-message comparison
Browse files- logmetric.py +8 -3
logmetric.py
CHANGED
@@ -111,10 +111,15 @@ class LogMetric(evaluate.Metric):
|
|
111 |
|
112 |
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
113 |
|
114 |
-
#
|
115 |
def get_overall_similarity(self, sentence1, sentence2):
|
116 |
-
|
|
|
117 |
|
|
|
|
|
|
|
|
|
118 |
|
119 |
def getLogMetric(self, pred : str, ref : str):
|
120 |
ref = ref.strip(' \t\n\r')
|
@@ -212,7 +217,7 @@ class LogMetric(evaluate.Metric):
|
|
212 |
timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
|
213 |
t_after_logmetric = time.perf_counter()
|
214 |
|
215 |
-
logmetric_duration = f"
|
216 |
|
217 |
return {
|
218 |
"score": timestamp_score,
|
|
|
111 |
|
112 |
return 1 - (abs(s1len - s2len) / max(s1len, s2len))
|
113 |
|
114 |
+
# Use minimum edit distance between two sentences
|
115 |
def get_overall_similarity(self, sentence1, sentence2):
|
116 |
+
s1split = sentence1.split()
|
117 |
+
s2split = sentence2.split()
|
118 |
|
119 |
+
jaccard_score = self.get_jaccard_similarity(set(s1split), set(s2split))
|
120 |
+
length_score = self.get_length_score(s1split, s2split)
|
121 |
+
|
122 |
+
return (jaccard_score * 0.7 + length_score * 0.3) * 100.0
|
123 |
|
124 |
def getLogMetric(self, pred : str, ref : str):
|
125 |
ref = ref.strip(' \t\n\r')
|
|
|
217 |
timestamp_score = np.mean([self.getLogMetric(p,r) for p,r in zip(predictions,references)])
|
218 |
t_after_logmetric = time.perf_counter()
|
219 |
|
220 |
+
logmetric_duration = f"{t_after_logmetric - t_before_logmetric:0.10f}"
|
221 |
|
222 |
return {
|
223 |
"score": timestamp_score,
|