import gradio as gr import spaces import os, torch, io import json os.system('python -m unidic download') # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.") from melo.api import TTS import tempfile import wave from pydub import AudioSegment @spaces.GPU def synthesize(conversation_text, speed, progress=gr.Progress()): device = 'cuda' if torch.cuda.is_available() else 'cpu' models = { 'EN': TTS(language='EN', device=device), } speakers = ['EN-US', 'EN-Default'] combined_audio = AudioSegment.empty() conversation = json.loads(conversation_text) for i, turn in enumerate(conversation["conversation"]): bio = io.BytesIO() text = turn["text"] speaker = speakers[i % 2] speaker_id = models['EN'].hps.data.spk2id[speaker] models['EN'].tts_to_file(text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format='wav') bio.seek(0) audio_segment = AudioSegment.from_file(bio, format="wav") combined_audio += audio_segment final_audio_path = 'final.mp3' combined_audio.export(final_audio_path, format='mp3') return final_audio_path with gr.Blocks() as demo: gr.Markdown('# Turn Any Article into Podcast') gr.Markdown('## Easily convert articles from URLs into listenable audio Podcast.') with gr.Group(): text = gr.Textbox(label="Article Link") btn = gr.Button('Podcasitfy', variant='primary') aud = gr.Audio(interactive=False) btn.click(synthesize, inputs=[text], outputs=[aud]) demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)