gnosticdev commited on
Commit
9e11cdd
·
verified ·
1 Parent(s): a2a2610

Update conver.py

Browse files
Files changed (1) hide show
  1. conver.py +51 -26
conver.py CHANGED
@@ -17,6 +17,7 @@ class ConversationConfig:
17
  max_words: int = 3000
18
  prefix_url: str = "https://r.jina.ai/"
19
  model_name: str = "meta-llama/Llama-3-8b-chat-hf"
 
20
 
21
  class URLToAudioConverter:
22
  def __init__(self, config: ConversationConfig, llm_api_key: str):
@@ -56,21 +57,20 @@ class URLToAudioConverter:
56
  if not text:
57
  raise ValueError("Input text cannot be empty")
58
  try:
59
- prompt = (
60
- f"{text}\nConvierte el texto proporcionado en un diálogo de podcast en español "
61
- f"entre Anfitrión1 y Anfitrión2. Genera una conversación extensa, detallada y natural, "
62
- f"como en un podcast real, con al menos 5 intercambios por hablante. "
63
- f"Devuelve SOLO un objeto JSON con la siguiente estructura:\n"
64
- '{"conversation": [{"speaker": "Anfitrión1", "text": "..."}, {"speaker": "Anfitrión2", "text": "..."}]}'
65
  )
66
- print(f"Texto de entrada: {text[:200]}...") # Depuración
67
  response = self.llm_client.chat.completions.create(
68
  messages=[{"role": "user", "content": prompt}],
69
  model=self.config.model_name,
70
  response_format={"type": "json_object"}
71
  )
72
  response_content = response.choices[0].message.content
73
- print(f"Respuesta cruda del modelo: {response_content[:500]}...") # Depuración
74
  json_str = response_content.strip()
75
  if not json_str.startswith('{'):
76
  json_str = json_str[json_str.find('{'):]
@@ -132,66 +132,91 @@ class URLToAudioConverter:
132
  self,
133
  speech_audio: AudioSegment,
134
  music_path: str,
135
- tags_paths: List[str]
 
136
  ) -> AudioSegment:
137
- music = AudioSegment.from_file(music_path).fade_out(2000) - 25
 
138
  if len(music) < len(speech_audio):
139
  music = music * ((len(speech_audio) // len(music)) + 1)
140
  music = music[:len(speech_audio)]
141
  mixed = speech_audio.overlay(music)
142
-
143
- tag_intro = AudioSegment.from_file(tags_paths[0]) - 10
144
  tag_trans = AudioSegment.from_file(tags_paths[1]) - 10
145
- final_audio = tag_intro + mixed
146
-
147
  silent_ranges = []
148
  for i in range(0, len(speech_audio) - 500, 100):
149
  chunk = speech_audio[i:i+500]
150
  if chunk.dBFS < -40:
151
  silent_ranges.append((i, i + 500))
152
-
153
  for start, end in reversed(silent_ranges):
154
  if (end - start) >= len(tag_trans):
155
  final_audio = final_audio.overlay(tag_trans, position=start + 50)
156
-
157
  return final_audio
158
 
159
- async def url_to_audio(self, url: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
160
  text = self.fetch_text(url)
161
  if len(words := text.split()) > self.config.max_words:
162
  text = " ".join(words[:self.config.max_words])
163
  conversation = self.extract_conversation(text)
164
- return await self._process_to_audio(conversation, voice_1, voice_2)
165
 
166
- async def text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
167
  conversation = self.extract_conversation(text)
168
- return await self._process_to_audio(conversation, voice_1, voice_2)
169
 
170
- async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
171
  conversation = {"conversation": [{"speaker": "Anfitrión1", "text": text}]}
172
- return await self._process_to_audio(conversation, voice_1, voice_2)
173
 
174
  async def _process_to_audio(
175
  self,
176
  conversation: Dict,
177
  voice_1: str,
178
- voice_2: str
 
179
  ) -> Tuple[str, str]:
180
  audio_files, folder_name = await self.text_to_speech(conversation, voice_1, voice_2)
181
  combined = self.combine_audio_files(audio_files)
182
  final_audio = self.add_background_music_and_tags(
183
  combined,
184
  "musica.mp3",
185
- ["tag.mp3", "tag2.mp3"]
 
186
  )
187
  output_path = os.path.join(folder_name, "podcast_final.mp3")
188
  final_audio.export(output_path, format="mp3")
189
-
190
  for f in audio_files:
191
  os.remove(f)
192
-
193
  text_output = "\n".join(
194
  f"{turn['speaker']}: {turn['text']}"
195
  for turn in conversation["conversation"]
196
  )
197
  return output_path, text_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  max_words: int = 3000
18
  prefix_url: str = "https://r.jina.ai/"
19
  model_name: str = "meta-llama/Llama-3-8b-chat-hf"
20
+ custom_prompt_template: str = None
21
 
22
  class URLToAudioConverter:
23
  def __init__(self, config: ConversationConfig, llm_api_key: str):
 
57
  if not text:
58
  raise ValueError("Input text cannot be empty")
59
  try:
60
+ prompt = self.config.custom_prompt_template.format(text=text) if self.config.custom_prompt_template else (
61
+ f"{text}\nConvierte el texto en un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. "
62
+ f"Genera una conversación extensa y natural con al menos 5 intercambios por hablante. "
63
+ f"Devuelve SOLO un objeto JSON: "
64
+ f'{{"conversation": [{{"speaker": "Anfitrión1", "text": "..."}}, {{"speaker": "Anfitrión2", "text": "..."}}]}}'
 
65
  )
66
+ print(f"Texto de entrada: {text[:200]}...")
67
  response = self.llm_client.chat.completions.create(
68
  messages=[{"role": "user", "content": prompt}],
69
  model=self.config.model_name,
70
  response_format={"type": "json_object"}
71
  )
72
  response_content = response.choices[0].message.content
73
+ print(f"Respuesta cruda del modelo: {response_content[:500]}...")
74
  json_str = response_content.strip()
75
  if not json_str.startswith('{'):
76
  json_str = json_str[json_str.find('{'):]
 
132
  self,
133
  speech_audio: AudioSegment,
134
  music_path: str,
135
+ tags_paths: List[str],
136
+ custom_music_path: str = None
137
  ) -> AudioSegment:
138
+ music_file = custom_music_path if custom_music_path and os.path.exists(custom_music_path) else music_path
139
+ music = AudioSegment.from_file(music_file).fade_out(2000) - 25
140
  if len(music) < len(speech_audio):
141
  music = music * ((len(speech_audio) // len(music)) + 1)
142
  music = music[:len(speech_audio)]
143
  mixed = speech_audio.overlay(music)
144
+ tag_outro = AudioSegment.from_file(tags_paths[0]) - 10
 
145
  tag_trans = AudioSegment.from_file(tags_paths[1]) - 10
146
+ final_audio = mixed + tag_outro
 
147
  silent_ranges = []
148
  for i in range(0, len(speech_audio) - 500, 100):
149
  chunk = speech_audio[i:i+500]
150
  if chunk.dBFS < -40:
151
  silent_ranges.append((i, i + 500))
 
152
  for start, end in reversed(silent_ranges):
153
  if (end - start) >= len(tag_trans):
154
  final_audio = final_audio.overlay(tag_trans, position=start + 50)
 
155
  return final_audio
156
 
157
+ async def url_to_audio(self, url: str, voice_1: str, voice_2: str, custom_music_path: str = None) -> Tuple[str, str]:
158
  text = self.fetch_text(url)
159
  if len(words := text.split()) > self.config.max_words:
160
  text = " ".join(words[:self.config.max_words])
161
  conversation = self.extract_conversation(text)
162
+ return await self._process_to_audio(conversation, voice_1, voice_2, custom_music_path)
163
 
164
+ async def text_to_audio(self, text: str, voice_1: str, voice_2: str, custom_music_path: str = None) -> Tuple[str, str]:
165
  conversation = self.extract_conversation(text)
166
+ return await self._process_to_audio(conversation, voice_1, voice_2, custom_music_path)
167
 
168
+ async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str, custom_music_path: str = None) -> Tuple[str, str]:
169
  conversation = {"conversation": [{"speaker": "Anfitrión1", "text": text}]}
170
+ return await self._process_to_audio(conversation, voice_1, voice_2, custom_music_path)
171
 
172
  async def _process_to_audio(
173
  self,
174
  conversation: Dict,
175
  voice_1: str,
176
+ voice_2: str,
177
+ custom_music_path: str = None
178
  ) -> Tuple[str, str]:
179
  audio_files, folder_name = await self.text_to_speech(conversation, voice_1, voice_2)
180
  combined = self.combine_audio_files(audio_files)
181
  final_audio = self.add_background_music_and_tags(
182
  combined,
183
  "musica.mp3",
184
+ ["tag.mp3", "tag2.mp3"],
185
+ custom_music_path
186
  )
187
  output_path = os.path.join(folder_name, "podcast_final.mp3")
188
  final_audio.export(output_path, format="mp3")
 
189
  for f in audio_files:
190
  os.remove(f)
 
191
  text_output = "\n".join(
192
  f"{turn['speaker']}: {turn['text']}"
193
  for turn in conversation["conversation"]
194
  )
195
  return output_path, text_output
196
+ ```
197
+
198
+ ### Ejemplo de `app.py`
199
+ ```python
200
+ from conver import ConversationConfig, URLToAudioConverter
201
+ import asyncio
202
+
203
+ async def main():
204
+ custom_prompt = (
205
+ "{text}\nCrea un diálogo de podcast en español entre Anfitrión1 y Anfitrión2. "
206
+ "Usa un tono informal y genera al menos 6 intercambios por hablante. "
207
+ "Devuelve SOLO un objeto JSON: {\"conversation\": [{\"speaker\": \"Anfitrión1\", \"text\": \"...\"}, {\"speaker\": \"Anfitrión2\", \"text\": \"...\"}]}"
208
+ )
209
+ config = ConversationConfig(custom_prompt_template=custom_prompt, max_words=5000)
210
+ converter = URLToAudioConverter(config, "tu_api_key")
211
+ text = "Discusión sobre inteligencia artificial y su impacto."
212
+ output_path, text_output = await converter.text_to_audio(
213
+ text,
214
+ voice_1="es-ES-AlvaroNeural",
215
+ voice_2="es-ES-ElviraNeural",
216
+ custom_music_path="mi_musica.mp3"
217
+ )
218
+ print(f"Podcast generado en: {output_path}")
219
+ print(f"Texto del diálogo:\n{text_output}")
220
+
221
+ if __name__ == "__main__":
222
+ asyncio.run(main())