File size: 1,499 Bytes
dfed10f
72c0928
a5d21d8
72c0928
3446d82
 
d8c6238
911f07d
 
d8c6238
a5d21d8
 
 
dfed10f
 
 
56c748c
 
 
dfed10f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3516019
dfed10f
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
import os
import os
os.system('pip install transformers')
import os
os.system('pip install torch')
import os
os.system('pip install tensorflow')
import os
os.system('pip install soundfile')
import soundfile as sf

from transformers import VitsModel, AutoTokenizer
import numpy as np
import io

import torch
print(torch.__version__)

# Load model and tokenizer
model = VitsModel.from_pretrained("facebook/mms-tts-eng")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")

def generate_speech(text):
  inputs = tokenizer(text, return_tensors="pt")

  with torch.no_grad():
    output = model(**inputs).waveform  # Corrected typo: waveform (not waveformform)

  # Convert the waveform tensor to a NumPy array
  waveform = output.squeeze().cpu().numpy()

  # Convert the waveform to bytes
  audio_bytes_io = io.BytesIO()
  sf.write(audio_bytes_io, waveform, samplerate=22050, format='WAV')
  audio_bytes_io.seek(0)

  return audio_bytes_io

st.title("Text-to-Speech Converter")
st.write("Developed by Hiba Bayz")
st.write("Enter text below and click 'Generate Speech' to convert it to audio.")

# Text input
text_input = st.text_area("Text to convert:", "Some example text in the English language")

if st.button("Generate Speech"):
  if text_input:
    st.write("Generating speech...")
    audio_bytes_io = generate_speech(text_input)

    # Display audio in Streamlit
    st.audio(audio_bytes_io, format="audio/wav")
  else:
    st.write("Please enter some text.")