Spaces:
Sleeping
Sleeping
Macrodove
commited on
Commit
·
105baa8
1
Parent(s):
325bf75
bug fixed
Browse filesFormer-commit-id: fde89b18c3b8a9bbab04e50cada7f31fd582da86
- evaluation/alignment.py +11 -8
evaluation/alignment.py
CHANGED
@@ -2,6 +2,7 @@ import sys
|
|
2 |
import numpy as np
|
3 |
sys.path.append('../src')
|
4 |
from srt_util.srt import SrtScript
|
|
|
5 |
|
6 |
|
7 |
# Helper method
|
@@ -30,6 +31,7 @@ def procedure(anchor, subsec, S_arr, subidx):
|
|
30 |
# Output: aligned array of SRTsegment corresponding to path1 path2
|
31 |
# Note: Modify comment with .source_text to get output array with string only
|
32 |
def alignment(pred_path, gt_path):
|
|
|
33 |
pred = SrtScript.parse_from_srt_file(pred_path).segments
|
34 |
gt = SrtScript.parse_from_srt_file(gt_path).segments
|
35 |
pred_arr, gt_arr = [], []
|
@@ -43,14 +45,14 @@ def alignment(pred_path, gt_path):
|
|
43 |
if not ps:
|
44 |
# If ps runs out, align gs segment with filler one by one
|
45 |
gt_arr.append(gs)#.source_text
|
46 |
-
pred_arr.append(
|
47 |
idx_t += 1
|
48 |
continue
|
49 |
|
50 |
if not gs:
|
51 |
# If gs runs out, align ps segment with filler one by one
|
52 |
pred_arr.append(ps)#.source_text
|
53 |
-
gt_arr.append(
|
54 |
idx_p += 1
|
55 |
continue
|
56 |
|
@@ -62,7 +64,7 @@ def alignment(pred_path, gt_path):
|
|
62 |
# Detect segment with no overlap
|
63 |
if ps.end < gs.start:
|
64 |
pred_arr.append(ps)#.source_text
|
65 |
-
gt_arr.append(
|
66 |
idx_t -= 1 # reset ground truth index
|
67 |
else:
|
68 |
gt_arr.append(gs)#.source_text
|
@@ -70,13 +72,13 @@ def alignment(pred_path, gt_path):
|
|
70 |
pred_arr.append(ps)#.source_text
|
71 |
idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
|
72 |
else: # filler pairing
|
73 |
-
pred_arr.append(
|
74 |
idx_p -= 1
|
75 |
else:
|
76 |
# same overlap checking procedure
|
77 |
if gs.end < ps.start:
|
78 |
gt_arr.append(gs)#.source_text
|
79 |
-
pred_arr.append(
|
80 |
idx_p -= 1 # reset
|
81 |
else:
|
82 |
pred_arr.append(ps)#.source_text
|
@@ -84,13 +86,14 @@ def alignment(pred_path, gt_path):
|
|
84 |
gt_arr.append(gs)#.source_text
|
85 |
idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
|
86 |
else: # filler pairing
|
87 |
-
gt_arr.append(
|
88 |
idx_t -= 1
|
89 |
|
90 |
idx_p += 1
|
91 |
idx_t += 1
|
92 |
-
#
|
|
|
93 |
return zip(pred_arr, gt_arr)
|
94 |
|
95 |
# Test Case
|
96 |
-
#alignment('
|
|
|
2 |
import numpy as np
|
3 |
sys.path.append('../src')
|
4 |
from srt_util.srt import SrtScript
|
5 |
+
from srt_util.srt import SrtSegment
|
6 |
|
7 |
|
8 |
# Helper method
|
|
|
31 |
# Output: aligned array of SRTsegment corresponding to path1 path2
|
32 |
# Note: Modify comment with .source_text to get output array with string only
|
33 |
def alignment(pred_path, gt_path):
|
34 |
+
empt = SrtSegment([0,'00:00:00,000 --> 00:00:00,000','','',''])
|
35 |
pred = SrtScript.parse_from_srt_file(pred_path).segments
|
36 |
gt = SrtScript.parse_from_srt_file(gt_path).segments
|
37 |
pred_arr, gt_arr = [], []
|
|
|
45 |
if not ps:
|
46 |
# If ps runs out, align gs segment with filler one by one
|
47 |
gt_arr.append(gs)#.source_text
|
48 |
+
pred_arr.append(empt)
|
49 |
idx_t += 1
|
50 |
continue
|
51 |
|
52 |
if not gs:
|
53 |
# If gs runs out, align ps segment with filler one by one
|
54 |
pred_arr.append(ps)#.source_text
|
55 |
+
gt_arr.append(empt)
|
56 |
idx_p += 1
|
57 |
continue
|
58 |
|
|
|
64 |
# Detect segment with no overlap
|
65 |
if ps.end < gs.start:
|
66 |
pred_arr.append(ps)#.source_text
|
67 |
+
gt_arr.append(empt) # append filler
|
68 |
idx_t -= 1 # reset ground truth index
|
69 |
else:
|
70 |
gt_arr.append(gs)#.source_text
|
|
|
72 |
pred_arr.append(ps)#.source_text
|
73 |
idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
|
74 |
else: # filler pairing
|
75 |
+
pred_arr.append(empt)
|
76 |
idx_p -= 1
|
77 |
else:
|
78 |
# same overlap checking procedure
|
79 |
if gs.end < ps.start:
|
80 |
gt_arr.append(gs)#.source_text
|
81 |
+
pred_arr.append(empt) # filler
|
82 |
idx_p -= 1 # reset
|
83 |
else:
|
84 |
pred_arr.append(ps)#.source_text
|
|
|
86 |
gt_arr.append(gs)#.source_text
|
87 |
idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
|
88 |
else: # filler pairing
|
89 |
+
gt_arr.append(empt)
|
90 |
idx_t -= 1
|
91 |
|
92 |
idx_p += 1
|
93 |
idx_t += 1
|
94 |
+
#for a in pred_arr:
|
95 |
+
# print(a.source_text)
|
96 |
return zip(pred_arr, gt_arr)
|
97 |
|
98 |
# Test Case
|
99 |
+
#alignment('test_translation_zh.srt', 'test_translation_bi.srt')
|