Spaces:

11mlabs
/

IndriVoice

Sleeping

App Files Files Community

IndriVoice / app.py

skriller18

Headings

1a0027d 4 months ago

raw

history blame

1.68 kB

	import torch
	import torchaudio
	from transformers import pipeline
	import streamlit as st

	model_id = '11mlabs/indri-0.1-124m-tts'
	task = 'indri-tts'

	pipe = pipeline(
	task,
	model=model_id,
	#device=torch.device('cuda:0'), # Update this based on your hardware,
	trust_remote_code=True
	)

	st.title("Indri")
	st.subheader("Ultrafast multi-modal AI")


	speakers = {
	"[spkr_63]" : "🇬🇧 👨 book reader",
	"[spkr_67]" : "🇺🇸 👨 influencer",
	"[spkr_68]" : "🇮🇳 👨 book reader",
	"[spkr_69]" : "🇮🇳 👨 book reader",
	"[spkr_70]" : "🇮🇳 👨 motivational speaker",
	"[spkr_62]" : "🇮🇳 👨 book reader heavy",
	"[spkr_53]" : "🇮🇳 👩 recipe reciter",
	"[spkr_60]" : "🇮🇳 👩 book reader",
	"[spkr_74]" : "🇺🇸 👨 book reader",
	"[spkr_75]" : "🇮🇳 👨 entrepreneur",
	"[spkr_76]" : "🇬🇧 👨 nature lover",
	"[spkr_77]" : "🇮🇳 👨 influencer",
	"[spkr_66]" : "🇮🇳 👨 politician"
	}

	# Create a container for the speaker selection and text input
	with st.container():
	st.markdown("### Speaker Selection")
	speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x])

	st.markdown("### Text Input")
	text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200)

	if st.button("Generate Audio", key="generate_audio"):
	if text_input:
	output = pipe([text_input], speaker=speaker_id)
	torchaudio.save('output.wav', output[0]['audio'][0], sample_rate=24000)
	st.audio('output.wav')
	else:
	st.warning("Please enter text to generate audio.")