Spaces:

latuan
/

SRT-to-Audio

Runtime error

latuan commited on Aug 21, 2024

Commit

9b42c11

1 Parent(s): bfea0bc

ver 1.9.5

Files changed (3) hide show

app.py CHANGED Viewed

@@ -172,9 +172,25 @@ def time_to_seconds(time_str):
     seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
     return seconds
 def generate_audio_with_pause(srt_file_path):
     subtitles = read_srt(srt_file_path)
     audio_clips = []
     for i, (start_time, end_time, text) in enumerate(subtitles):
         # Generate initial audio
@@ -186,11 +202,14 @@ def generate_audio_with_pause(srt_file_path):
         current_duration = len(audio_data) / 16000
         # Adjust audio speed by speedup
-        if desired_duration < current_duration:
-            speedup_factor = current_duration / desired_duration
-            audio_data = librosa.effects.time_stretch(y=audio_data, rate=speedup_factor)
-            audio_data / np.max(np.abs(audio_data))
         audio_clips.append(audio_data)
         # Add pause

     seconds = int(h) * 3600 + int(m) * 60 + float(s.replace(',', '.'))
     return seconds
+def numpy_to_audiosegment(numpy_array):
+    audio_segment = AudioSegment(
+        numpy_array.tobytes(),
+        frame_rate=16000,
+        sample_width=numpy_array.dtype.itemsize,
+        channels=1
+    )
+    return audio_segment
+def audiosegment_to_numpy(audio_segment):
+    return np.array(audio_segment.get_array_of_samples())
+def closest_speedup_factor(factor, allowed_factors):
+    return min(allowed_factors, key=lambda x: abs(x - factor))
 def generate_audio_with_pause(srt_file_path):
     subtitles = read_srt(srt_file_path)
     audio_clips = []
+    allowed_factors = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]
     for i, (start_time, end_time, text) in enumerate(subtitles):
         # Generate initial audio
         current_duration = len(audio_data) / 16000
         # Adjust audio speed by speedup
+        audio_segment = numpy_to_audiosegment(audio_data)
+        if current_duration > desired_duration:
+            raw_speedup_factor = current_duration / desired_duration
+            speedup_factor = closest_speedup_factor(raw_speedup_factor, allowed_factors)
+            audio_segment = audio_segment.speedup(playback_speed=speedup_factor)
+        # Convert back to numpy array
+        audio_data = audiosegment_to_numpy(audio_segment)
         audio_clips.append(audio_data)
         # Add pause

flagged/log.csv DELETED Viewed

@@ -1,4 +0,0 @@
-name,output,flag,username,timestamp
-asdasdasdasdasd,Hello asdasdasdasdasd!!,,,2024-08-21 09:52:15.746931
-asdasdasdasdasd,Hello asdasdasdasdasd!!,,,2024-08-21 09:52:18.666674
-asdasdasdasdasd,Hello asdasdasdasdasd!!,,,2024-08-21 09:52:27.597313

requirements.txt CHANGED Viewed

@@ -9,5 +9,4 @@ librosa==0.10.0
 pydub==0.25.1
 speechbrain==0.5.16
 moviepy
-IPython
-librosa

 pydub==0.25.1
 speechbrain==0.5.16
 moviepy
+IPython