Spaces:

manufy
/

tts-gradio

Sleeping

App Files Files Community

Manu commited on May 18, 2024

Commit

0e826bb

1 Parent(s): e8b9495

revert to wav file recording

Browse files

Files changed (1) hide show

app.py +24 -7

app.py CHANGED Viewed

@@ -41,6 +41,11 @@ def synthesise_audio(text, forward_params=None):
         raise ValueError("Error: El texto es demasiado largo. Por favor, limita tu entrada a 100 caracteres.")
     speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
     # sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
     # return "speech.wav"
@@ -59,21 +64,33 @@ def synthesise_audio(text, forward_params=None):
     #return speech["audio"]
     #return audio
     # Create an in-memory buffer to store the audio data
-    audio_buffer = io.BytesIO()
     # Write the audio data to the in-memory buffer
-    sf.write(audio_buffer, speech["audio"], samplerate=speech["sampling_rate"], format="WAV")
     # Move the buffer cursor to the beginning of the buffer
-    audio_buffer.seek(0)
     # Read the audio data from the in-memory buffer into a numpy array
-    audio, sr = sf.read(audio_buffer)
-    return audio, sr
@@ -85,8 +102,8 @@ input_text = gr.Textbox(lines=10, label="Enter text here")
 demo = gr.Interface(fn=synthesise_audio,
                     inputs=input_text,
-                    #outputs="audio",
-                    outputs = gr.Audio(type="numpy"),
                     description="----- manuai Text To Speech generator -----",
                     allow_flagging = False)

         raise ValueError("Error: El texto es demasiado largo. Por favor, limita tu entrada a 100 caracteres.")
     speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
+    sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
+    return "speech.wav"
     # sf.write("speech.wav", speech["audio"], samplerate=speech["sampling_rate"])
     # return "speech.wav"
     #return speech["audio"]
     #return audio
+    # Ensure audio is a numpy array
+    #if isinstance(speech["audio"], int):
+    #    audio = np.array([speech["audio"]])
+    #else:
+    #    audio = speech["audio"]
     # Create an in-memory buffer to store the audio data
+    #print("Creating in-memory buffer")
+    #audio_buffer = io.BytesIO()
     # Write the audio data to the in-memory buffer
+    #print("Writing audio data to in-memory buffer")
+    #sf.write(audio_buffer, speech["audio"], samplerate=speech["sampling_rate"], format="WAV")
     # Move the buffer cursor to the beginning of the buffer
+    #audio_buffer.seek(0)
     # Read the audio data from the in-memory buffer into a numpy array
+    #print("Reading audio data from in-memory buffer")
+    #audio, sr = sf.read(audio_buffer)
+    #print("Audio data read from in-memory buffer, returning audio data and sample rate")
+    # Ensure audio is a numpy array before returning
+    #audio = np.array(audio)
+    #return audio, sr
 demo = gr.Interface(fn=synthesise_audio,
                     inputs=input_text,
+                    outputs="audio",
+                    #outputs = gr.Audio(type="numpy"),
                     description="----- manuai Text To Speech generator -----",
                     allow_flagging = False)