DLI-SLQ commited on
Commit
4e63be7
·
1 Parent(s): 21f3450

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import wave
3
+ import numpy as np
4
+ from io import BytesIO
5
+ from huggingface_hub import hf_hub_download
6
+ from piper import PiperVoice
7
+ from transformers import pipeline
8
+
9
+ # Load the NSFW classifier model
10
+ nsfw_detector = pipeline("text-classification", model="michellejieli/NSFW_text_classifier")
11
+
12
+ def synthesize_speech(text):
13
+ # Check for NSFW content
14
+ nsfw_result = nsfw_detector(text)
15
+ if nsfw_result[0]['label'] == 'NSFW':
16
+ return "NSFW content detected. Cannot process.", None
17
+
18
+ model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
19
+ config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
20
+ voice = PiperVoice.load(model_path, config_path)
21
+
22
+ # Create an in-memory buffer for the WAV file
23
+ buffer = BytesIO()
24
+ with wave.open(buffer, 'wb') as wav_file:
25
+ wav_file.setframerate(voice.config.sample_rate)
26
+ wav_file.setsampwidth(2) # 16-bit
27
+ wav_file.setnchannels(1) # mono
28
+
29
+ # Synthesize speech
30
+ voice.synthesize(text, wav_file)
31
+
32
+ # Convert buffer to NumPy array for Gradio output
33
+ buffer.seek(0)
34
+ audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
35
+
36
+ return audio_data.tobytes(), None
37
+
38
+ # Using Gradio Blocks
39
+ with gr.Blocks(theme=gr.themes.Base()) as blocks:
40
+ gr.Markdown("# Text to Speech Synthesizer")
41
+ gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
42
+ input_text = gr.Textbox(label="Input Text")
43
+ output_audio = gr.Audio(label="Synthesized Speech", type="numpy")
44
+ output_text = gr.Textbox(label="Output Text", visible=False) # This is the new text output component
45
+ submit_button = gr.Button("Synthesize")
46
+
47
+ submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
48
+
49
+ # Run the app
50
+ blocks.launch()