Spaces:
Sleeping
Sleeping
import torch | |
import torchaudio | |
from transformers import pipeline | |
import streamlit as st | |
model_id = '11mlabs/indri-0.1-124m-tts' | |
task = 'indri-tts' | |
pipe = pipeline( | |
task, | |
model=model_id, | |
#device=torch.device('cuda:0'), # Update this based on your hardware, | |
trust_remote_code=True | |
) | |
st.title("Indri") | |
st.subheader("Ultrafast multi-modal AI") | |
speakers = { | |
"[spkr_63]" : "๐ฌ๐ง ๐จ book reader", | |
"[spkr_67]" : "๐บ๐ธ ๐จ influencer", | |
"[spkr_68]" : "๐ฎ๐ณ ๐จ book reader", | |
"[spkr_69]" : "๐ฎ๐ณ ๐จ book reader", | |
"[spkr_70]" : "๐ฎ๐ณ ๐จ motivational speaker", | |
"[spkr_62]" : "๐ฎ๐ณ ๐จ book reader heavy", | |
"[spkr_53]" : "๐ฎ๐ณ ๐ฉ recipe reciter", | |
"[spkr_60]" : "๐ฎ๐ณ ๐ฉ book reader", | |
"[spkr_74]" : "๐บ๐ธ ๐จ book reader", | |
"[spkr_75]" : "๐ฎ๐ณ ๐จ entrepreneur", | |
"[spkr_76]" : "๐ฌ๐ง ๐จ nature lover", | |
"[spkr_77]" : "๐ฎ๐ณ ๐จ influencer", | |
"[spkr_66]" : "๐ฎ๐ณ ๐จ politician" | |
} | |
# Create a container for the speaker selection and text input | |
with st.container(): | |
st.markdown("### Speaker Selection") | |
speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x]) | |
st.markdown("### Text Input") | |
text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200) | |
if st.button("Generate Audio", key="generate_audio"): | |
if text_input: | |
output = pipe([text_input], speaker=speaker_id) | |
torchaudio.save('output.wav', output[0]['audio'][0], sample_rate=24000) | |
st.audio('output.wav') | |
else: | |
st.warning("Please enter text to generate audio.") | |