ganga4364 commited on
Commit
fdb2ada
·
verified ·
1 Parent(s): 90e01e6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
3
+ import torch
4
+ import scipy.io.wavfile
5
+ from io import BytesIO
6
+
7
+ # Load the processor and model for TTS
8
+ model_id = "ganga4364/mms-tts-bod-finetune-sherab"
9
+ processor = AutoProcessor.from_pretrained(model_id)
10
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
11
+
12
+ # Function to generate audio from input text and save it to a file
13
+ def generate_audio(input_text):
14
+ # Process the text input
15
+ inputs = processor(text=input_text, return_tensors="pt")
16
+
17
+ # Generate the speech using the model
18
+ with torch.no_grad():
19
+ outputs = model.generate(**inputs)
20
+
21
+ # Decode the output to obtain the waveform
22
+ audio_waveform = processor.decode(outputs[0], output_type="np")
23
+
24
+ # Save the audio to a file (e.g., 'output.wav')
25
+ file_path = "output.wav"
26
+ scipy.io.wavfile.write(file_path, rate=16000, data=audio_waveform) # Assuming 16kHz sample rate
27
+
28
+ # Return the file path so Gradio can return the audio file
29
+ return file_path
30
+
31
+ # Create the Gradio interface
32
+ iface = gr.Interface(
33
+ fn=generate_audio,
34
+ inputs="text",
35
+ outputs="audio", # Output should be the audio file
36
+ title="Text-to-Speech Tibetan Model",
37
+ description="Enter text to generate speech with a fine-tuned Tibetan voice and return the audio."
38
+ )
39
+
40
+ # Launch the Gradio app
41
+ iface.launch()