Spaces:

ganga4364
/

mms-tts-bod

Sleeping

App Files Files Community

ganga4364 commited on Oct 8, 2024

Commit

72ea965

verified ·

1 Parent(s): fba041b

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -5

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import gradio as gr
 from transformers import pipeline
 import scipy.io.wavfile
-from io import BytesIO
 import os
 import datetime
-import uuid
 import shutil
 import soundfile as sf
 import nltk
 nltk.download('punkt')  # Ensure that 'punkt' tokenizer is downloaded
 from nltk import sent_tokenize
@@ -34,15 +34,17 @@ def generate_audio(input_text):
     for i, sentence in enumerate(sentences):
         # Perform TTS inference for each sentence
         speech = synthesiser(sentence)
         # Extract the audio data and sampling rate from the pipeline output
-        audio_data = speech["audio"][0]
         sample_rate = speech["sampling_rate"]
         # Save each sentence as a separate audio file
         wav_path = f"{user_dir}/s_{str(i).zfill(10)}.wav"
-        scipy.io.wavfile.write(wav_path, rate=sample_rate, data=audio_data)
         audio_files.append(wav_path)
     # Combine all audio files into one file
@@ -63,6 +65,7 @@ def combine_wav(source_dir, stamp):
     sr = None
     for file in wav_files:
         file_path = os.path.join(source_dir, file)
         data, sample_rate = sf.read(file_path)
         if sr is None:
             sr = sample_rate  # Set the sample rate based on the first file
@@ -70,7 +73,7 @@ def combine_wav(source_dir, stamp):
     # Save the combined audio to a new WAV file
     combined_file_path = f"{stamp}_combined.wav"
-    sf.write(combined_file_path, combined_data, sr)
     # Clean up temporary files
     shutil.rmtree(source_dir)

 import gradio as gr
 from transformers import pipeline
 import scipy.io.wavfile
 import os
 import datetime
 import shutil
 import soundfile as sf
 import nltk
+import numpy as np  # Add numpy to handle audio data
 nltk.download('punkt')  # Ensure that 'punkt' tokenizer is downloaded
 from nltk import sent_tokenize
     for i, sentence in enumerate(sentences):
         # Perform TTS inference for each sentence
+        print(f"Processing sentence {i+1}: {sentence}")
         speech = synthesiser(sentence)
         # Extract the audio data and sampling rate from the pipeline output
+        audio_data = np.array(speech["audio"])  # Ensure the data is a NumPy array
         sample_rate = speech["sampling_rate"]
         # Save each sentence as a separate audio file
         wav_path = f"{user_dir}/s_{str(i).zfill(10)}.wav"
+        print(f"Saving audio to {wav_path}")
+        scipy.io.wavfile.write(wav_path, rate=sample_rate, data=audio_data.astype(np.int16))  # Ensure 16-bit format for WAV
         audio_files.append(wav_path)
     # Combine all audio files into one file
     sr = None
     for file in wav_files:
         file_path = os.path.join(source_dir, file)
+        print(f"Combining {file_path}")
         data, sample_rate = sf.read(file_path)
         if sr is None:
             sr = sample_rate  # Set the sample rate based on the first file
     # Save the combined audio to a new WAV file
     combined_file_path = f"{stamp}_combined.wav"
+    sf.write(combined_file_path, np.array(combined_data), sr)
     # Clean up temporary files
     shutil.rmtree(source_dir)