Spaces:

DLI-SLQ
/

piper-tts

Runtime error

App Files Files Community

DLI-SLQ commited on Dec 20, 2023

Commit

4e63be7

1 Parent(s): 21f3450

Create app.py

Browse files

Files changed (1) hide show

app.py +50 -0

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import gradio as gr
+import wave
+import numpy as np
+from io import BytesIO
+from huggingface_hub import hf_hub_download
+from piper import PiperVoice
+from transformers import pipeline
+# Load the NSFW classifier model
+nsfw_detector = pipeline("text-classification", model="michellejieli/NSFW_text_classifier")
+def synthesize_speech(text):
+    # Check for NSFW content
+    nsfw_result = nsfw_detector(text)
+    if nsfw_result[0]['label'] == 'NSFW':
+        return "NSFW content detected. Cannot process.", None
+    model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
+    config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
+    voice = PiperVoice.load(model_path, config_path)
+    # Create an in-memory buffer for the WAV file
+    buffer = BytesIO()
+    with wave.open(buffer, 'wb') as wav_file:
+        wav_file.setframerate(voice.config.sample_rate)
+        wav_file.setsampwidth(2)  # 16-bit
+        wav_file.setnchannels(1)  # mono
+        # Synthesize speech
+        voice.synthesize(text, wav_file)
+    # Convert buffer to NumPy array for Gradio output
+    buffer.seek(0)
+    audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
+    return audio_data.tobytes(), None
+# Using Gradio Blocks
+with gr.Blocks(theme=gr.themes.Base()) as blocks:
+    gr.Markdown("# Text to Speech Synthesizer")
+    gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
+    input_text = gr.Textbox(label="Input Text")
+    output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
+    output_text = gr.Textbox(label="Output Text", visible=False)  # This is the new text output component
+    submit_button = gr.Button("Synthesize")
+    submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
+# Run the app
+blocks.launch()