Macrodove commited on
Commit
105baa8
·
1 Parent(s): 325bf75

Former-commit-id: fde89b18c3b8a9bbab04e50cada7f31fd582da86

Files changed (1) hide show
  1. evaluation/alignment.py +11 -8
evaluation/alignment.py CHANGED
@@ -2,6 +2,7 @@ import sys
2
  import numpy as np
3
  sys.path.append('../src')
4
  from srt_util.srt import SrtScript
 
5
 
6
 
7
  # Helper method
@@ -30,6 +31,7 @@ def procedure(anchor, subsec, S_arr, subidx):
30
  # Output: aligned array of SRTsegment corresponding to path1 path2
31
  # Note: Modify comment with .source_text to get output array with string only
32
  def alignment(pred_path, gt_path):
 
33
  pred = SrtScript.parse_from_srt_file(pred_path).segments
34
  gt = SrtScript.parse_from_srt_file(gt_path).segments
35
  pred_arr, gt_arr = [], []
@@ -43,14 +45,14 @@ def alignment(pred_path, gt_path):
43
  if not ps:
44
  # If ps runs out, align gs segment with filler one by one
45
  gt_arr.append(gs)#.source_text
46
- pred_arr.append('')
47
  idx_t += 1
48
  continue
49
 
50
  if not gs:
51
  # If gs runs out, align ps segment with filler one by one
52
  pred_arr.append(ps)#.source_text
53
- gt_arr.append('')
54
  idx_p += 1
55
  continue
56
 
@@ -62,7 +64,7 @@ def alignment(pred_path, gt_path):
62
  # Detect segment with no overlap
63
  if ps.end < gs.start:
64
  pred_arr.append(ps)#.source_text
65
- gt_arr.append('') # append filler
66
  idx_t -= 1 # reset ground truth index
67
  else:
68
  gt_arr.append(gs)#.source_text
@@ -70,13 +72,13 @@ def alignment(pred_path, gt_path):
70
  pred_arr.append(ps)#.source_text
71
  idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
72
  else: # filler pairing
73
- pred_arr.append('')
74
  idx_p -= 1
75
  else:
76
  # same overlap checking procedure
77
  if gs.end < ps.start:
78
  gt_arr.append(gs)#.source_text
79
- pred_arr.append('') # filler
80
  idx_p -= 1 # reset
81
  else:
82
  pred_arr.append(ps)#.source_text
@@ -84,13 +86,14 @@ def alignment(pred_path, gt_path):
84
  gt_arr.append(gs)#.source_text
85
  idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
86
  else: # filler pairing
87
- gt_arr.append('')
88
  idx_t -= 1
89
 
90
  idx_p += 1
91
  idx_t += 1
92
- #print(gt_arr)
 
93
  return zip(pred_arr, gt_arr)
94
 
95
  # Test Case
96
- #alignment('../results/...PATH1.../FILE.srt', '../results/PATH2/FILE.srt')
 
2
  import numpy as np
3
  sys.path.append('../src')
4
  from srt_util.srt import SrtScript
5
+ from srt_util.srt import SrtSegment
6
 
7
 
8
  # Helper method
 
31
  # Output: aligned array of SRTsegment corresponding to path1 path2
32
  # Note: Modify comment with .source_text to get output array with string only
33
  def alignment(pred_path, gt_path):
34
+ empt = SrtSegment([0,'00:00:00,000 --> 00:00:00,000','','',''])
35
  pred = SrtScript.parse_from_srt_file(pred_path).segments
36
  gt = SrtScript.parse_from_srt_file(gt_path).segments
37
  pred_arr, gt_arr = [], []
 
45
  if not ps:
46
  # If ps runs out, align gs segment with filler one by one
47
  gt_arr.append(gs)#.source_text
48
+ pred_arr.append(empt)
49
  idx_t += 1
50
  continue
51
 
52
  if not gs:
53
  # If gs runs out, align ps segment with filler one by one
54
  pred_arr.append(ps)#.source_text
55
+ gt_arr.append(empt)
56
  idx_p += 1
57
  continue
58
 
 
64
  # Detect segment with no overlap
65
  if ps.end < gs.start:
66
  pred_arr.append(ps)#.source_text
67
+ gt_arr.append(empt) # append filler
68
  idx_t -= 1 # reset ground truth index
69
  else:
70
  gt_arr.append(gs)#.source_text
 
72
  pred_arr.append(ps)#.source_text
73
  idx_p = procedure(gs, pred, pred_arr, idx_p + 1)
74
  else: # filler pairing
75
+ pred_arr.append(empt)
76
  idx_p -= 1
77
  else:
78
  # same overlap checking procedure
79
  if gs.end < ps.start:
80
  gt_arr.append(gs)#.source_text
81
+ pred_arr.append(empt) # filler
82
  idx_p -= 1 # reset
83
  else:
84
  pred_arr.append(ps)#.source_text
 
86
  gt_arr.append(gs)#.source_text
87
  idx_t = procedure(ps, gt, gt_arr, idx_t + 1)
88
  else: # filler pairing
89
+ gt_arr.append(empt)
90
  idx_t -= 1
91
 
92
  idx_p += 1
93
  idx_t += 1
94
+ #for a in pred_arr:
95
+ # print(a.source_text)
96
  return zip(pred_arr, gt_arr)
97
 
98
  # Test Case
99
+ #alignment('test_translation_zh.srt', 'test_translation_bi.srt')