Podcastking2

Running

App Files Files Community

gnosticdev commited on 22 days ago

Commit

9e11cdd

verified ·

1 Parent(s): a2a2610

Update conver.py

Browse files

Files changed (1) hide show

conver.py +51 -26

conver.py CHANGED Viewed

@@ -17,6 +17,7 @@ class ConversationConfig:
     max_words: int = 3000
     prefix_url: str = "https://r.jina.ai/"
     model_name: str = "meta-llama/Llama-3-8b-chat-hf"
 class URLToAudioConverter:
     def __init__(self, config: ConversationConfig, llm_api_key: str):
@@ -56,21 +57,20 @@ class URLToAudioConverter:
         if not text:
             raise ValueError("Input text cannot be empty")
         try:
-            prompt = (
-                f"{text}\nConvierte el texto proporcionado en un diálogo de podcast en español "
-                f"entre Anfitrión1 y Anfitrión2. Genera una conversación extensa, detallada y natural, "
-                f"como en un podcast real, con al menos 5 intercambios por hablante. "
-                f"Devuelve SOLO un objeto JSON con la siguiente estructura:\n"
-                '{"conversation": [{"speaker": "Anfitrión1", "text": "..."}, {"speaker": "Anfitrión2", "text": "..."}]}'
             )
-            print(f"Texto de entrada: {text[:200]}...")  # Depuración
             response = self.llm_client.chat.completions.create(
                 messages=[{"role": "user", "content": prompt}],
                 model=self.config.model_name,
                 response_format={"type": "json_object"}
             )
             response_content = response.choices[0].message.content
-            print(f"Respuesta cruda del modelo: {response_content[:500]}...")  # Depuración
             json_str = response_content.strip()
             if not json_str.startswith('{'):
                 json_str = json_str[json_str.find('{'):]
@@ -132,66 +132,91 @@ class URLToAudioConverter:
         self,
         speech_audio: AudioSegment,
         music_path: str,
-        tags_paths: List[str]
     ) -> AudioSegment:
-        music = AudioSegment.from_file(music_path).fade_out(2000) - 25
         if len(music) < len(speech_audio):
             music = music * ((len(speech_audio) // len(music)) + 1)
         music = music[:len(speech_audio)]
         mixed = speech_audio.overlay(music)
-        tag_intro = AudioSegment.from_file(tags_paths[0]) - 10
         tag_trans = AudioSegment.from_file(tags_paths[1]) - 10
-        final_audio = tag_intro + mixed
         silent_ranges = []
         for i in range(0, len(speech_audio) - 500, 100):
             chunk = speech_audio[i:i+500]
             if chunk.dBFS < -40:
                 silent_ranges.append((i, i + 500))
         for start, end in reversed(silent_ranges):
             if (end - start) >= len(tag_trans):
                 final_audio = final_audio.overlay(tag_trans, position=start + 50)
         return final_audio
-    async def url_to_audio(self, url: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
         text = self.fetch_text(url)
         if len(words := text.split()) > self.config.max_words:
             text = " ".join(words[:self.config.max_words])
         conversation = self.extract_conversation(text)
-        return await self._process_to_audio(conversation, voice_1, voice_2)
-    async def text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
         conversation = self.extract_conversation(text)
-        return await self._process_to_audio(conversation, voice_1, voice_2)
-    async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
         conversation = {"conversation": [{"speaker": "Anfitrión1", "text": text}]}
-        return await self._process_to_audio(conversation, voice_1, voice_2)
     async def _process_to_audio(
         self,
         conversation: Dict,
         voice_1: str,
-        voice_2: str
     ) -> Tuple[str, str]:
         audio_files, folder_name = await self.text_to_speech(conversation, voice_1, voice_2)
         combined = self.combine_audio_files(audio_files)
         final_audio = self.add_background_music_and_tags(
             combined,
             "musica.mp3",
-            ["tag.mp3", "tag2.mp3"]
         )
         output_path = os.path.join(folder_name, "podcast_final.mp3")
         final_audio.export(output_path, format="mp3")
         for f in audio_files:
             os.remove(f)
         text_output = "\n".join(
             f"{turn['speaker']}: {turn['text']}"
             for turn in conversation["conversation"]
         )
         return output_path, text_output

     max_words: int = 3000
     prefix_url: str = "https://r.jina.ai/"
     model_name: str = "meta-llama/Llama-3-8b-chat-hf"
+    custom_prompt_template: str = None
 class URLToAudioConverter:
     def __init__(self, config: ConversationConfig, llm_api_key: str):
         if not text:
             raise ValueError("Input text cannot be empty")
         try:
+            prompt = self.config.custom_prompt_template.format(text=text) if self.config.custom_prompt_template else (
+                f"{text}\nConvierte el texto en un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. "
+                f"Genera una conversación extensa y natural con al menos 5 intercambios por hablante. "
+                f"Devuelve SOLO un objeto JSON: "
+                f'{{"conversation": [{{"speaker": "Anfitrión1", "text": "..."}}, {{"speaker": "Anfitrión2", "text": "..."}}]}}'
             )
+            print(f"Texto de entrada: {text[:200]}...")
             response = self.llm_client.chat.completions.create(
                 messages=[{"role": "user", "content": prompt}],
                 model=self.config.model_name,
                 response_format={"type": "json_object"}
             )
             response_content = response.choices[0].message.content
+            print(f"Respuesta cruda del modelo: {response_content[:500]}...")
             json_str = response_content.strip()
             if not json_str.startswith('{'):
                 json_str = json_str[json_str.find('{'):]
         self,
         speech_audio: AudioSegment,
         music_path: str,
+        tags_paths: List[str],
+        custom_music_path: str = None
     ) -> AudioSegment:
+        music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else music_path
+        music = AudioSegment.from_file(music_file).fade_out(2000) - 25
         if len(music) < len(speech_audio):
             music = music * ((len(speech_audio) // len(music)) + 1)
         music = music[:len(speech_audio)]
         mixed = speech_audio.overlay(music)
+        tag_outro = AudioSegment.from_file(tags_paths[0]) - 10
         tag_trans = AudioSegment.from_file(tags_paths[1]) - 10
+        final_audio = mixed + tag_outro
         silent_ranges = []
         for i in range(0, len(speech_audio) - 500, 100):
             chunk = speech_audio[i:i+500]
             if chunk.dBFS < -40:
                 silent_ranges.append((i, i + 500))
         for start, end in reversed(silent_ranges):
             if (end - start) >= len(tag_trans):
                 final_audio = final_audio.overlay(tag_trans, position=start + 50)
         return final_audio
+    async def url_to_audio(self, url: str, voice_1: str, voice_2: str, custom_music_path: str = None) -> Tuple[str, str]:
         text = self.fetch_text(url)
         if len(words := text.split()) > self.config.max_words:
             text = " ".join(words[:self.config.max_words])
         conversation = self.extract_conversation(text)
+        return await self._process_to_audio(conversation, voice_1, voice_2, custom_music_path)
+    async def text_to_audio(self, text: str, voice_1: str, voice_2: str, custom_music_path: str = None) -> Tuple[str, str]:
         conversation = self.extract_conversation(text)
+        return await self._process_to_audio(conversation, voice_1, voice_2, custom_music_path)
+    async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str, custom_music_path: str = None) -> Tuple[str, str]:
         conversation = {"conversation": [{"speaker": "Anfitrión1", "text": text}]}
+        return await self._process_to_audio(conversation, voice_1, voice_2, custom_music_path)
     async def _process_to_audio(
         self,
         conversation: Dict,
         voice_1: str,
+        voice_2: str,
+        custom_music_path: str = None
     ) -> Tuple[str, str]:
         audio_files, folder_name = await self.text_to_speech(conversation, voice_1, voice_2)
         combined = self.combine_audio_files(audio_files)
         final_audio = self.add_background_music_and_tags(
             combined,
             "musica.mp3",
+            ["tag.mp3", "tag2.mp3"],
+            custom_music_path
         )
         output_path = os.path.join(folder_name, "podcast_final.mp3")
         final_audio.export(output_path, format="mp3")
         for f in audio_files:
             os.remove(f)
         text_output = "\n".join(
             f"{turn['speaker']}: {turn['text']}"
             for turn in conversation["conversation"]
         )
         return output_path, text_output
+```
+### Ejemplo de `app.py`
+```python
+from conver import ConversationConfig, URLToAudioConverter
+import asyncio
+async def main():
+    custom_prompt = (
+        "{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. "
+        "Usa un tono informal y genera al menos 6 intercambios por hablante. "
+        "Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
+    )
+    config = ConversationConfig(custom_prompt_template=custom_prompt, max_words=5000)
+    converter = URLToAudioConverter(config, "tu_api_key")
+    text = "Discusión sobre inteligencia artificial y su impacto."
+    output_path, text_output = await converter.text_to_audio(
+        text,
+        voice_1="es-ES-AlvaroNeural",
+        voice_2="es-ES-ElviraNeural",
+        custom_music_path="mi_musica.mp3"
+    )
+    print(f"Podcast generado en: {output_path}")
+    print(f"Texto del diálogo:\n{text_output}")
+if __name__ == "__main__":
+    asyncio.run(main())