Spaces:
Running
Running
File size: 5,744 Bytes
fd43dfa 039f896 fd43dfa 1bb4376 fd43dfa fe32dd7 039f896 fd43dfa 039f896 fe32dd7 fd43dfa 90d12be fe32dd7 90d12be fe32dd7 1bb4376 90d12be fe32dd7 1bb4376 59b69bc 90d12be 59b69bc 1bb4376 7e8954f 1bb4376 7e8954f 1bb4376 59b69bc 1bb4376 fe32dd7 90d12be fe32dd7 59b69bc 1bb4376 de6323e 90d12be 7e8954f 90d12be 1bb4376 de6323e 1bb4376 de6323e 1bb4376 de6323e fd43dfa 90d12be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import gradio as gr
import os
import asyncio
import json
from conver import ConversationConfig, URLToAudioConverter
from dotenv import load_dotenv
from pydub import AudioSegment
load_dotenv()
MUSICA_FONDO = "musica.mp3"
TAG1 = "tag.mp3"
TAG2 = "tag2.mp3"
def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str:
podcast_audio = AudioSegment.from_file(audio_path)
music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else MUSICA_FONDO
musica_fondo = AudioSegment.from_file(music_file).apply_gain(-15)
tag_outro = AudioSegment.from_file(TAG1).apply_gain(-5)
tag_trans = AudioSegment.from_file(TAG2).apply_gain(-5)
duracion_podcast = len(podcast_audio)
repeticiones = (duracion_podcast // len(musica_fondo)) + 1
musica_fondo_loop = musica_fondo * repeticiones
musica_fondo_loop = musica_fondo_loop[:duracion_podcast]
mezcla = musica_fondo_loop.overlay(podcast_audio)
mezcla = mezcla + tag_outro
silent_ranges = []
for i in range(0, len(podcast_audio) - 500, 100):
chunk = podcast_audio[i:i+500]
if chunk.dBFS < -40:
silent_ranges.append((i, i + 500))
for start, end in reversed(silent_ranges):
if (end - start) >= len(tag_trans):
mezcla = mezcla.overlay(tag_trans, position=start + 50)
output_path = audio_path.replace(".mp3", "_con_musica.mp3")
mezcla.export(output_path, format="mp3")
return output_path
async def generate_dialogue(article_url, text_input, language, skip_llm, custom_prompt):
if not article_url and not text_input:
return "Error: Ingresa una URL o texto", None
try:
config = ConversationConfig(custom_prompt_template=custom_prompt)
converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
if skip_llm and text_input:
dialogue = {"conversation": [{"speaker": "Anfitrión1", "text": text_input}]}
elif text_input:
dialogue = converter.extract_conversation(text_input)
else:
dialogue = converter.extract_conversation(await converter.fetch_text(article_url))
# Usar comillas dobles al mostrar el JSON
return json.dumps(dialogue, indent=2, ensure_ascii=False), dialogue
except Exception as e:
return f"Error: {str(e)}", None
async def generate_audio(dialogue_json, language, agregar_musica, custom_music):
try:
# Validar JSON antes de parsear
json_str = dialogue_json.strip()
try:
dialogue = json.loads(json_str)
except json.JSONDecodeError as e:
return f"Error: JSON inválido - {str(e)}", None
if not dialogue.get("conversation"):
return "Error: El JSON no contiene 'conversation'", None
config = ConversationConfig()
converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
voices = {
"en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
"es": ("es-ES-AlvaroNeural", "es-ES-ElviraNeural")
}
voice1, voice2 = voices.get(language, voices["en"])
output_file, conversation = await converter._process_to_audio(dialogue, voice1, voice2, custom_music)
if agregar_musica:
output_file = mezclar_musica_y_tags(output_file, custom_music)
return conversation, output_file
except Exception as e:
return f"Error: {str(e)}", None
def synthesize_sync(article_url, text_input, language, skip_llm, custom_prompt):
return asyncio.run(generate_dialogue(article_url, text_input, language, skip_llm, custom_prompt))
def generate_audio_sync(dialogue_json, language, agregar_musica, custom_music):
return asyncio.run(generate_audio(dialogue_json, language, agregar_musica, custom_music))
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("# 🎙 Podcast Converter")
with gr.Group():
text_url = gr.Textbox(label="URL (opcional)", placeholder="https://...")
text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
custom_prompt = gr.Textbox(
label="Prompt personalizado (opcional)",
placeholder='{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {"conversation": [{"speaker": "Anfitrión1", "text": "..."}, {"speaker": "Anfitrión2", "text": "..."}]}'
)
btn_dialogue = gr.Button("Generar Diálogo", variant="primary")
with gr.Group():
dialogue_json = gr.Textbox(label="Diálogo JSON (editable)", lines=10, interactive=True)
agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
btn_audio = gr.Button("Generar Audio", variant="primary")
with gr.Row():
conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
aud = gr.Audio(label="Audio Generado", interactive=False)
btn_dialogue.click(
synthesize_sync,
inputs=[text_url, text_input, language, skip_llm, custom_prompt],
outputs=[dialogue_json, dialogue_json]
)
btn_audio.click(
generate_audio_sync,
inputs=[dialogue_json, language, agregar_musica, custom_music],
outputs=[conv_display, aud]
)
demo.launch() |