leenag commited on
Commit
5a1a7ec
·
verified ·
1 Parent(s): c953361

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -23
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import gradio as gr
2
  import torch
 
3
  from transformers import VitsModel, AutoTokenizer
4
- import soundfile as sf
5
- import tempfile
6
- import os
7
 
8
  LANG_MODEL_MAP = {
9
  "English": "facebook/mms-tts-eng",
@@ -25,35 +23,26 @@ def load_model_and_tokenizer(language):
25
  return cache[model_name]
26
 
27
  def tts(language, text):
28
- try:
29
- if not text.strip():
30
- return "Please enter some text.", None
31
 
32
- tokenizer, model = load_model_and_tokenizer(language)
33
- inputs = tokenizer(text, return_tensors="pt").to(device)
34
 
35
- with torch.no_grad():
36
- output = model(**inputs)
37
 
38
- # Save to temporary WAV file
39
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
40
- sf.write(f.name, output.waveform.cpu().numpy(), samplerate=16000)
41
- return "Here is your audio output", f.name
42
-
43
- except Exception as e:
44
- return f"Error: {str(e)}", None
45
 
46
  iface = gr.Interface(
47
  fn=tts,
48
  inputs=[
49
- gr.Dropdown(label="Select Language", choices=list(LANG_MODEL_MAP.keys()), value="English"),
50
  gr.Textbox(label="Enter Text")
51
  ],
52
- outputs=[
53
- gr.Textbox(label="Status"),
54
- gr.Audio(label="Synthesized Speech", type="filepath")
55
- ],
56
- title="Multilingual TTS with Meta MMS",
57
  description="Generate speech from text using Meta's MMS models for English, Hindi, Tamil, Malayalam, and Kannada."
58
  )
59
 
 
1
  import gradio as gr
2
  import torch
3
+ import numpy as np
4
  from transformers import VitsModel, AutoTokenizer
 
 
 
5
 
6
  LANG_MODEL_MAP = {
7
  "English": "facebook/mms-tts-eng",
 
23
  return cache[model_name]
24
 
25
  def tts(language, text):
26
+ if not text.strip():
27
+ return 16000, np.zeros(1) # empty waveform if no text
 
28
 
29
+ tokenizer, model = load_model_and_tokenizer(language)
30
+ inputs = tokenizer(text, return_tensors="pt").to(device)
31
 
32
+ with torch.no_grad():
33
+ output = model(**inputs)
34
 
35
+ waveform = output.waveform.squeeze().cpu().numpy()
36
+ return 16000, waveform
 
 
 
 
 
37
 
38
  iface = gr.Interface(
39
  fn=tts,
40
  inputs=[
41
+ gr.Dropdown(choices=list(LANG_MODEL_MAP.keys()), label="Select Language"),
42
  gr.Textbox(label="Enter Text")
43
  ],
44
+ outputs=gr.Audio(label="Synthesized Speech", type="numpy"),
45
+ title="Multilingual Text-to-Speech (MMS)",
 
 
 
46
  description="Generate speech from text using Meta's MMS models for English, Hindi, Tamil, Malayalam, and Kannada."
47
  )
48