Podcastking2

Running

App Files Files Community

gnosticdev commited on Jun 21

Commit

1bb4376

verified ·

1 Parent(s): 90d12be

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -16

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import os
 import asyncio
 from conver import ConversationConfig, URLToAudioConverter
 from dotenv import load_dotenv
 from pydub import AudioSegment
@@ -24,7 +25,7 @@ def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str
     musica_fondo_loop = musica_fondo_loop[:duracion_podcast]
     mezcla = musica_fondo_loop.overlay(podcast_audio)
-    mezcla = mezcla + tag_outro  # tag.mp3 como outro
     silent_ranges = []
     for i in range(0, len(podcast_audio) - 500, 100):
@@ -39,16 +40,30 @@ def mezclar_musica_y_tags(audio_path: str, custom_music_path: str = None) -> str
     mezcla.export(output_path, format="mp3")
     return output_path
-def synthesize_sync(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt):
-    return asyncio.run(synthesize(article_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt))
-async def synthesize(article_url, text_input, language="en", skip_llm=False, agregar_musica=False, custom_music=None, custom_prompt=None):
     if not article_url and not text_input:
         return "Error: Ingresa una URL o texto", None
     try:
         config = ConversationConfig(custom_prompt_template=custom_prompt)
         converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
         voices = {
             "en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
@@ -56,12 +71,7 @@ async def synthesize(article_url, text_input, language="en", skip_llm=False, agr
         }
         voice1, voice2 = voices.get(language, voices["en"])
-        if skip_llm and text_input:
-            output_file, conversation = await converter.raw_text_to_audio(text_input, voice1, voice2, custom_music)
-        elif text_input:
-            output_file, conversation = await converter.text_to_audio(text_input, voice1, voice2, custom_music)
-        else:
-            output_file, conversation = await converter.url_to_audio(article_url, voice1, voice2, custom_music)
         if agregar_musica:
             output_file = mezclar_musica_y_tags(output_file, custom_music)
@@ -70,6 +80,12 @@ async def synthesize(article_url, text_input, language="en", skip_llm=False, agr
     except Exception as e:
         return f"Error: {str(e)}", None
 with gr.Blocks(theme='gstaff/sketch') as demo:
     gr.Markdown("# 🎙 Podcast Converter")
     with gr.Group():
@@ -77,21 +93,30 @@ with gr.Blocks(theme='gstaff/sketch') as demo:
         text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
         language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
         skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
-        agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
-        custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
         custom_prompt = gr.Textbox(
             label="Prompt personalizado (opcional)",
             placeholder="{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
         )
-        btn = gr.Button("Generar Podcast", variant="primary")
     with gr.Row():
         conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
         aud = gr.Audio(label="Audio Generado", interactive=False)
-    btn.click(
         synthesize_sync,
-        inputs=[text_url, text_input, language, skip_llm, agregar_musica, custom_music, custom_prompt],
         outputs=[conv_display, aud]
     )

 import gradio as gr
 import os
 import asyncio
+import json
 from conver import ConversationConfig, URLToAudioConverter
 from dotenv import load_dotenv
 from pydub import AudioSegment
     musica_fondo_loop = musica_fondo_loop[:duracion_podcast]
     mezcla = musica_fondo_loop.overlay(podcast_audio)
+    mezcla = mezcla + tag_outro
     silent_ranges = []
     for i in range(0, len(podcast_audio) - 500, 100):
     mezcla.export(output_path, format="mp3")
     return output_path
+async def generate_dialogue(article_url, text_input, language, skip_llm, custom_prompt):
     if not article_url and not text_input:
         return "Error: Ingresa una URL o texto", None
     try:
         config = ConversationConfig(custom_prompt_template=custom_prompt)
         converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
+        if skip_llm and text_input:
+            dialogue = {"conversation": [{"speaker": "Anfitrión1", "text": text_input}]}
+        elif text_input:
+            dialogue = converter.extract_conversation(text_input)
+        else:
+            dialogue = converter.extract_conversation(await converter.fetch_text(article_url))
+        return json.dumps(dialogue, indent=2, ensure_ascii=False), dialogue
+    except Exception as e:
+        return f"Error: {str(e)}", None
+async def generate_audio(dialogue_json, language, agregar_musica, custom_music):
+    try:
+        dialogue = json.loads(dialogue_json)
+        config = ConversationConfig()
+        converter = URLToAudioConverter(config, llm_api_key=os.environ.get("TOGETHER_API_KEY"))
         voices = {
             "en": ("en-US-AvaMultilingualNeural", "en-US-AndrewMultilingualNeural"),
         }
         voice1, voice2 = voices.get(language, voices["en"])
+        output_file, conversation = await converter._process_to_audio(dialogue, voice1, voice2, custom_music)
         if agregar_musica:
             output_file = mezclar_musica_y_tags(output_file, custom_music)
     except Exception as e:
         return f"Error: {str(e)}", None
+def synthesize_sync(article_url, text_input, language, skip_llm, custom_prompt):
+    return asyncio.run(generate_dialogue(article_url, text_input, language, skip_llm, custom_prompt))
+def generate_audio_sync(dialogue_json, language, agregar_musica, custom_music):
+    return asyncio.run(generate_audio(dialogue_json, language, agregar_musica, custom_music))
 with gr.Blocks(theme='gstaff/sketch') as demo:
     gr.Markdown("# 🎙 Podcast Converter")
     with gr.Group():
         text_input = gr.Textbox(label="Texto manual", lines=5, placeholder="Pega tu texto aquí...")
         language = gr.Dropdown(["en", "es"], label="Idioma", value="en")
         skip_llm = gr.Checkbox(label="🔴 Modo libre (sin filtros LLM)", value=False)
         custom_prompt = gr.Textbox(
             label="Prompt personalizado (opcional)",
             placeholder="{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. Usa un tono informal y genera al menos 6 intercambios por hablante. Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
         )
+        btn_dialogue = gr.Button("Generar Diálogo", variant="primary")
+    with gr.Group():
+        dialogue_json = gr.Textbox(label="Diálogo JSON (editable)", lines=10, interactive=True)
+        agregar_musica = gr.Checkbox(label="🎵 Agregar música de fondo y cortinillas", value=False)
+        custom_music = gr.File(label="Subir música de fondo (opcional)", file_types=[".mp3"])
+        btn_audio = gr.Button("Generar Audio", variant="primary")
     with gr.Row():
         conv_display = gr.Textbox(label="Conversación", interactive=False, lines=10)
         aud = gr.Audio(label="Audio Generado", interactive=False)
+    btn_dialogue.click(
         synthesize_sync,
+        inputs=[text_url, text_input, language, skip_llm, custom_prompt],
+        outputs=[dialogue_json, dialogue_json]
+    )
+    btn_audio.click(
+        generate_audio_sync,
+        inputs=[dialogue_json, language, agregar_musica, custom_music],
         outputs=[conv_display, aud]
     )