latuan commited on
Commit
9b42c11
·
1 Parent(s): bfea0bc
Files changed (3) hide show
  1. app.py +24 -5
  2. flagged/log.csv +0 -4
  3. requirements.txt +1 -2
app.py CHANGED
@@ -172,9 +172,25 @@ def time_to_seconds(time_str):
172
  seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
173
  return seconds
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  def generate_audio_with_pause(srt_file_path):
176
  subtitles = read_srt(srt_file_path)
177
  audio_clips = []
 
178
 
179
  for i, (start_time, end_time, text) in enumerate(subtitles):
180
  # Generate initial audio
@@ -186,11 +202,14 @@ def generate_audio_with_pause(srt_file_path):
186
  current_duration = len(audio_data) / 16000
187
 
188
  # Adjust audio speed by speedup
189
- if desired_duration < current_duration:
190
- speedup_factor = current_duration / desired_duration
191
- audio_data = librosa.effects.time_stretch(y=audio_data, rate=speedup_factor)
192
- audio_data / np.max(np.abs(audio_data))
193
-
 
 
 
194
  audio_clips.append(audio_data)
195
 
196
  # Add pause
 
172
  seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
173
  return seconds
174
 
175
+ def numpy_to_audiosegment(numpy_array):
176
+ audio_segment = AudioSegment(
177
+ numpy_array.tobytes(),
178
+ frame_rate=16000,
179
+ sample_width=numpy_array.dtype.itemsize,
180
+ channels=1
181
+ )
182
+ return audio_segment
183
+
184
+ def audiosegment_to_numpy(audio_segment):
185
+ return np.array(audio_segment.get_array_of_samples())
186
+
187
+ def closest_speedup_factor(factor, allowed_factors):
188
+ return min(allowed_factors, key=lambda x: abs(x - factor))
189
+
190
  def generate_audio_with_pause(srt_file_path):
191
  subtitles = read_srt(srt_file_path)
192
  audio_clips = []
193
+ allowed_factors = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
194
 
195
  for i, (start_time, end_time, text) in enumerate(subtitles):
196
  # Generate initial audio
 
202
  current_duration = len(audio_data) / 16000
203
 
204
  # Adjust audio speed by speedup
205
+ audio_segment = numpy_to_audiosegment(audio_data)
206
+ if current_duration > desired_duration:
207
+ raw_speedup_factor = current_duration / desired_duration
208
+ speedup_factor = closest_speedup_factor(raw_speedup_factor, allowed_factors)
209
+ audio_segment = audio_segment.speedup(playback_speed=speedup_factor)
210
+
211
+ # Convert back to numpy array
212
+ audio_data = audiosegment_to_numpy(audio_segment)
213
  audio_clips.append(audio_data)
214
 
215
  # Add pause
flagged/log.csv DELETED
@@ -1,4 +0,0 @@
1
- name,output,flag,username,timestamp
2
- asdasdasdasdasd,Hello asdasdasdasdasd!!,,,2024-08-21 09:52:15.746931
3
- asdasdasdasdasd,Hello asdasdasdasdasd!!,,,2024-08-21 09:52:18.666674
4
- asdasdasdasdasd,Hello asdasdasdasdasd!!,,,2024-08-21 09:52:27.597313
 
 
 
 
 
requirements.txt CHANGED
@@ -9,5 +9,4 @@ librosa==0.10.0
9
  pydub==0.25.1
10
  speechbrain==0.5.16
11
  moviepy
12
- IPython
13
- librosa
 
9
  pydub==0.25.1
10
  speechbrain==0.5.16
11
  moviepy
12
+ IPython