Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
import scipy.io.wavfile
|
4 |
-
from io import BytesIO
|
5 |
import os
|
6 |
import datetime
|
7 |
-
import uuid
|
8 |
import shutil
|
9 |
import soundfile as sf
|
10 |
import nltk
|
|
|
|
|
11 |
nltk.download('punkt') # Ensure that 'punkt' tokenizer is downloaded
|
12 |
from nltk import sent_tokenize
|
13 |
|
@@ -34,15 +34,17 @@ def generate_audio(input_text):
|
|
34 |
|
35 |
for i, sentence in enumerate(sentences):
|
36 |
# Perform TTS inference for each sentence
|
|
|
37 |
speech = synthesiser(sentence)
|
38 |
|
39 |
# Extract the audio data and sampling rate from the pipeline output
|
40 |
-
audio_data = speech["audio"]
|
41 |
sample_rate = speech["sampling_rate"]
|
42 |
|
43 |
# Save each sentence as a separate audio file
|
44 |
wav_path = f"{user_dir}/s_{str(i).zfill(10)}.wav"
|
45 |
-
|
|
|
46 |
audio_files.append(wav_path)
|
47 |
|
48 |
# Combine all audio files into one file
|
@@ -63,6 +65,7 @@ def combine_wav(source_dir, stamp):
|
|
63 |
sr = None
|
64 |
for file in wav_files:
|
65 |
file_path = os.path.join(source_dir, file)
|
|
|
66 |
data, sample_rate = sf.read(file_path)
|
67 |
if sr is None:
|
68 |
sr = sample_rate # Set the sample rate based on the first file
|
@@ -70,7 +73,7 @@ def combine_wav(source_dir, stamp):
|
|
70 |
|
71 |
# Save the combined audio to a new WAV file
|
72 |
combined_file_path = f"{stamp}_combined.wav"
|
73 |
-
sf.write(combined_file_path, combined_data, sr)
|
74 |
|
75 |
# Clean up temporary files
|
76 |
shutil.rmtree(source_dir)
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
import scipy.io.wavfile
|
|
|
4 |
import os
|
5 |
import datetime
|
|
|
6 |
import shutil
|
7 |
import soundfile as sf
|
8 |
import nltk
|
9 |
+
import numpy as np # Add numpy to handle audio data
|
10 |
+
|
11 |
nltk.download('punkt') # Ensure that 'punkt' tokenizer is downloaded
|
12 |
from nltk import sent_tokenize
|
13 |
|
|
|
34 |
|
35 |
for i, sentence in enumerate(sentences):
|
36 |
# Perform TTS inference for each sentence
|
37 |
+
print(f"Processing sentence {i+1}: {sentence}")
|
38 |
speech = synthesiser(sentence)
|
39 |
|
40 |
# Extract the audio data and sampling rate from the pipeline output
|
41 |
+
audio_data = np.array(speech["audio"]) # Ensure the data is a NumPy array
|
42 |
sample_rate = speech["sampling_rate"]
|
43 |
|
44 |
# Save each sentence as a separate audio file
|
45 |
wav_path = f"{user_dir}/s_{str(i).zfill(10)}.wav"
|
46 |
+
print(f"Saving audio to {wav_path}")
|
47 |
+
scipy.io.wavfile.write(wav_path, rate=sample_rate, data=audio_data.astype(np.int16)) # Ensure 16-bit format for WAV
|
48 |
audio_files.append(wav_path)
|
49 |
|
50 |
# Combine all audio files into one file
|
|
|
65 |
sr = None
|
66 |
for file in wav_files:
|
67 |
file_path = os.path.join(source_dir, file)
|
68 |
+
print(f"Combining {file_path}")
|
69 |
data, sample_rate = sf.read(file_path)
|
70 |
if sr is None:
|
71 |
sr = sample_rate # Set the sample rate based on the first file
|
|
|
73 |
|
74 |
# Save the combined audio to a new WAV file
|
75 |
combined_file_path = f"{stamp}_combined.wav"
|
76 |
+
sf.write(combined_file_path, np.array(combined_data), sr)
|
77 |
|
78 |
# Clean up temporary files
|
79 |
shutil.rmtree(source_dir)
|