TextToSpeech

Sleeping

App Files Files Community

hivecorp commited on Oct 20, 2024

Commit

fbe2197

verified ·

1 Parent(s): 34ebd65

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -9

app.py CHANGED Viewed

@@ -27,25 +27,26 @@ async def text_to_speech(text, voice, rate, pitch):
     return tmp_path, None
 # Generate SRT file based on user preferences
-def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph):
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
-        segment_duration = audio_duration / (len(words) // lines_per_paragraph)  # Average duration for each segment
-        current_time = 0
         for i in range(0, len(words), words_per_line * lines_per_paragraph):  # Every segment according to specified preferences
             segment_words = words[i:i + (words_per_line * lines_per_paragraph)]
             lines = [segment_words[j:j + words_per_line] for j in range(0, len(segment_words), words_per_line)]
             lines = [' '.join(line) for line in lines]
             start_time = current_time
-            end_time = start_time + segment_duration
             start_time_str = format_srt_time(start_time)
             end_time_str = format_srt_time(end_time)
             srt_file.write(f"{i // (words_per_line * lines_per_paragraph) + 1}\n{start_time_str} --> {end_time_str}\n")
             srt_file.write('\n'.join(lines) + '\n\n')
-            current_time += segment_duration  # Update current time for the next segment
     return srt_path
@@ -71,7 +72,7 @@ async def text_to_audio_and_srt(text, voice, rate, pitch, words_per_line, lines_
     base_name = os.path.splitext(audio_path)[0]
     srt_path = f"{base_name}_subtitle.srt"
     words = text.split()
-    generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph)
     return audio_path, srt_path, None
@@ -100,8 +101,8 @@ async def create_demo():
                 rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
                 pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
-                words_per_line = gr.Slider(minimum=8, maximum=12, value=10, label="Words per Line", step=1)
-                lines_per_paragraph = gr.Slider(minimum=1, maximum=4, value=2, label="Lines per Paragraph", step=1)
                 generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")

     return tmp_path, None
 # Generate SRT file based on user preferences
+def generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph, speech_rate):
+    segment_duration = audio_duration / (len(words) / lines_per_paragraph)  # Average duration for each segment
+    adjusted_duration = segment_duration * (60 / (100 + speech_rate))  # Adjust duration based on speech rate
+    current_time = 0
     with open(srt_path, 'w', encoding='utf-8') as srt_file:
         for i in range(0, len(words), words_per_line * lines_per_paragraph):  # Every segment according to specified preferences
             segment_words = words[i:i + (words_per_line * lines_per_paragraph)]
             lines = [segment_words[j:j + words_per_line] for j in range(0, len(segment_words), words_per_line)]
             lines = [' '.join(line) for line in lines]
             start_time = current_time
+            end_time = start_time + adjusted_duration
             start_time_str = format_srt_time(start_time)
             end_time_str = format_srt_time(end_time)
             srt_file.write(f"{i // (words_per_line * lines_per_paragraph) + 1}\n{start_time_str} --> {end_time_str}\n")
             srt_file.write('\n'.join(lines) + '\n\n')
+            current_time += adjusted_duration  # Update current time for the next segment
     return srt_path
     base_name = os.path.splitext(audio_path)[0]
     srt_path = f"{base_name}_subtitle.srt"
     words = text.split()
+    generate_srt(words, audio_duration, srt_path, words_per_line, lines_per_paragraph, rate)
     return audio_path, srt_path, None
                 rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Rate Adjustment (%)", step=1)
                 pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
+                words_per_line = gr.Slider(minimum=3, maximum=8, value=5, label="Words per Line", step=1)
+                lines_per_paragraph = gr.Slider(minimum=1, maximum=5, value=2, label="Lines per Paragraph", step=1)
                 generate_button = gr.Button("Generate Audio and Subtitles", variant="primary")