LAP-DEV commited on
Commit
b9fa8bc
·
verified ·
1 Parent(s): 3e596a4

Update modules/utils/subtitle_manager.py

Browse files
Files changed (1) hide show
  1. modules/utils/subtitle_manager.py +51 -9
modules/utils/subtitle_manager.py CHANGED
@@ -80,22 +80,64 @@ def get_vtt(segments):
80
  return output
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def get_txt(segments):
 
84
  output = ""
 
 
85
  for i, segment in enumerate(segments):
86
- if segment['text'].startswith(' '):
87
- segment['text'] = segment['text'][1:]
88
- #output += f"{segment['text']}\n"
89
- output += f"{timeformat_txt(segment['start'])}\t{segment['text']}\n"
90
- return output
 
 
 
 
 
 
 
 
 
91
 
92
  def get_plaintext(segments):
 
93
  output = ""
 
 
94
  for i, segment in enumerate(segments):
95
- if segment['text'].startswith(' '):
96
- segment['text'] = segment['text'][1:]
97
- output += f"{segment['text']}\n"
98
- return output
 
 
 
 
 
 
 
 
 
 
99
 
100
  def parse_srt(file_path):
101
  """Reads SRT file and returns as dict"""
 
80
  return output
81
 
82
 
83
+ #def get_txt(segments):
84
+ # output = ""
85
+ # for i, segment in enumerate(segments):
86
+ # if segment['text'].startswith(' '):
87
+ # segment['text'] = segment['text'][1:]
88
+ # #output += f"{segment['text']}\n"
89
+ # output += f"{timeformat_txt(segment['start'])}\t{segment['text']}\n"
90
+ # return output
91
+
92
+ #def get_plaintext(segments):
93
+ # output = ""
94
+ # for i, segment in enumerate(segments):
95
+ # if segment['text'].startswith(' '):
96
+ # segment['text'] = segment['text'][1:]
97
+ # output += f"{segment['text']}\n"
98
+ # return output
99
+
100
  def get_txt(segments):
101
+ bDiarization = False
102
  output = ""
103
+
104
+ # Check if speakers are identified
105
  for i, segment in enumerate(segments):
106
+ if re.search(r'SPEAKER [0-9][0-9]: ',segment['text']) != None:
107
+ bDiarization = True
108
+ break
109
+
110
+ for i, segment in enumerate(segments):
111
+ if bDiarization:
112
+ speaker_id = ((segment['text']).split(":", 1)[0]).strip()
113
+ speaker_text = ((segment['text']).split(":", 1)[1]).strip()
114
+ output += f"{timeformat_txt(segment['start'])}\t{speaker_id}\t{speaker_text}\n"
115
+ else:
116
+ speaker_text = (segment['text']).strip()
117
+ output += f"{timeformat_txt(segment['start'])}\t{speaker_text}\n"
118
+
119
+ return output.rstrip("\n")
120
 
121
  def get_plaintext(segments):
122
+ bDiarization = False
123
  output = ""
124
+
125
+ # Check if speakers are identified
126
  for i, segment in enumerate(segments):
127
+ if re.search(r'SPEAKER [0-9][0-9]: ',segment['text']) != None:
128
+ bDiarization = True
129
+ break
130
+
131
+ for i, segment in enumerate(segments):
132
+ if bDiarization:
133
+ speaker_id = ((segment['text']).split(":", 1)[0]).strip()
134
+ speaker_text = ((segment['text']).split(":", 1)[1]).strip()
135
+ output += f"{timeformat_txt({speaker_id}\t{speaker_text}\n"
136
+ else:
137
+ speaker_text = (segment['text']).strip()
138
+ output += f"{speaker_text}\n"
139
+
140
+ return output.rstrip("\n")
141
 
142
  def parse_srt(file_path):
143
  """Reads SRT file and returns as dict"""