10zinten commited on
Commit
5de81a9
·
verified ·
1 Parent(s): 883a948

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -9
app.py CHANGED
@@ -4,30 +4,45 @@ import scipy.io.wavfile
4
  import numpy as np
5
 
6
  # Load the MMS-TTS model and processor for Tibetan (bod)
7
- model_id = "ganga4364/mms-tts-bod-finetune-sherab" # Replace with your fine-tuned model if necessary
8
-
9
 
10
  # Use the text-to-speech pipeline with the model
11
  synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU
12
 
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Function to perform TTS inference and save audio to a file
15
  def generate_audio(input_text):
 
 
16
  # Perform TTS inference
17
  speech = synthesiser(input_text)
18
- file_path = "finetuned_output.wav"
19
- # Save the audio to a file (e.g., 'output.wav')
20
- scipy.io.wavfile.write(file_path, rate=speech["sampling_rate"], data=speech["audio"][0])
 
21
 
22
- # Return the path to the audio file
23
- return file_path
24
 
25
  # Create the Gradio interface
26
  iface = gr.Interface(
27
  fn=generate_audio,
28
  inputs="text", # Text input for the TTS
29
  outputs="audio", # Output will be an audio file
30
- title="Tibetan Text-to-Speech (MMS-TTS)",
31
  description="Enter Tibetan text and generate speech using MMS-TTS."
32
  )
33
 
 
4
  import numpy as np
5
 
6
  # Load the MMS-TTS model and processor for Tibetan (bod)
7
+ model_id = "openpecha/mms-tts-sherab"
 
8
 
9
  # Use the text-to-speech pipeline with the model
10
  synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU
11
 
12
+ def replace_numbers_with_convert(sentence, wylie=True):
13
+ pattern = r'\d+(\.\d+)?'
14
+ def replace(match):
15
+ return convert(match.group(), wylie)
16
+
17
+ result = re.sub(pattern, replace, sentence)
18
+
19
+ return result
20
+
21
+ def num2letter(sentence):
22
+ tibetan_nums = "༠༡༢༣༤༥༦༧༨༩"
23
+ for i, n in enumerate(tibetan_nums):
24
+ sentence = sentence.replace(n, str(i))
25
+ result = replace_numbers_with_convert(sentence, wylie=False)
26
+ return result
27
+
28
  # Function to perform TTS inference and save audio to a file
29
  def generate_audio(input_text):
30
+ # preprocess
31
+ text = num2letter(text)
32
  # Perform TTS inference
33
  speech = synthesiser(input_text)
34
+ # postprocess
35
+ audio = noisereduce.reduce_noise(y=speech["audio"], sr=speech["sampling_rate"])
36
+
37
+ return audio, speech["sampling_rate"]
38
 
 
 
39
 
40
  # Create the Gradio interface
41
  iface = gr.Interface(
42
  fn=generate_audio,
43
  inputs="text", # Text input for the TTS
44
  outputs="audio", # Output will be an audio file
45
+ title="Tibetan Text-to-Speech (MMS-TTS) Sherab",
46
  description="Enter Tibetan text and generate speech using MMS-TTS."
47
  )
48