ganga4364 commited on
Commit
72ea965
·
verified ·
1 Parent(s): fba041b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -5
app.py CHANGED
@@ -1,13 +1,13 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import scipy.io.wavfile
4
- from io import BytesIO
5
  import os
6
  import datetime
7
- import uuid
8
  import shutil
9
  import soundfile as sf
10
  import nltk
 
 
11
  nltk.download('punkt') # Ensure that 'punkt' tokenizer is downloaded
12
  from nltk import sent_tokenize
13
 
@@ -34,15 +34,17 @@ def generate_audio(input_text):
34
 
35
  for i, sentence in enumerate(sentences):
36
  # Perform TTS inference for each sentence
 
37
  speech = synthesiser(sentence)
38
 
39
  # Extract the audio data and sampling rate from the pipeline output
40
- audio_data = speech["audio"][0]
41
  sample_rate = speech["sampling_rate"]
42
 
43
  # Save each sentence as a separate audio file
44
  wav_path = f"{user_dir}/s_{str(i).zfill(10)}.wav"
45
- scipy.io.wavfile.write(wav_path, rate=sample_rate, data=audio_data)
 
46
  audio_files.append(wav_path)
47
 
48
  # Combine all audio files into one file
@@ -63,6 +65,7 @@ def combine_wav(source_dir, stamp):
63
  sr = None
64
  for file in wav_files:
65
  file_path = os.path.join(source_dir, file)
 
66
  data, sample_rate = sf.read(file_path)
67
  if sr is None:
68
  sr = sample_rate # Set the sample rate based on the first file
@@ -70,7 +73,7 @@ def combine_wav(source_dir, stamp):
70
 
71
  # Save the combined audio to a new WAV file
72
  combined_file_path = f"{stamp}_combined.wav"
73
- sf.write(combined_file_path, combined_data, sr)
74
 
75
  # Clean up temporary files
76
  shutil.rmtree(source_dir)
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import scipy.io.wavfile
 
4
  import os
5
  import datetime
 
6
  import shutil
7
  import soundfile as sf
8
  import nltk
9
+ import numpy as np # Add numpy to handle audio data
10
+
11
  nltk.download('punkt') # Ensure that 'punkt' tokenizer is downloaded
12
  from nltk import sent_tokenize
13
 
 
34
 
35
  for i, sentence in enumerate(sentences):
36
  # Perform TTS inference for each sentence
37
+ print(f"Processing sentence {i+1}: {sentence}")
38
  speech = synthesiser(sentence)
39
 
40
  # Extract the audio data and sampling rate from the pipeline output
41
+ audio_data = np.array(speech["audio"]) # Ensure the data is a NumPy array
42
  sample_rate = speech["sampling_rate"]
43
 
44
  # Save each sentence as a separate audio file
45
  wav_path = f"{user_dir}/s_{str(i).zfill(10)}.wav"
46
+ print(f"Saving audio to {wav_path}")
47
+ scipy.io.wavfile.write(wav_path, rate=sample_rate, data=audio_data.astype(np.int16)) # Ensure 16-bit format for WAV
48
  audio_files.append(wav_path)
49
 
50
  # Combine all audio files into one file
 
65
  sr = None
66
  for file in wav_files:
67
  file_path = os.path.join(source_dir, file)
68
+ print(f"Combining {file_path}")
69
  data, sample_rate = sf.read(file_path)
70
  if sr is None:
71
  sr = sample_rate # Set the sample rate based on the first file
 
73
 
74
  # Save the combined audio to a new WAV file
75
  combined_file_path = f"{stamp}_combined.wav"
76
+ sf.write(combined_file_path, np.array(combined_data), sr)
77
 
78
  # Clean up temporary files
79
  shutil.rmtree(source_dir)