gnosticdev commited on
Commit
2247844
·
verified ·
1 Parent(s): 56f8366

Update conver.py

Browse files
Files changed (1) hide show
  1. conver.py +31 -14
conver.py CHANGED
@@ -1,5 +1,5 @@
1
  from dataclasses import dataclass
2
- from typing import List, Tuple, Dict, Optional
3
  import os
4
  import json
5
  import httpx
@@ -14,7 +14,7 @@ from pathlib import Path
14
  class ConversationConfig:
15
  max_words: int = 3000
16
  prefix_url: str = "https://r.jina.ai/"
17
- model_name: str = "meta-llama/Llama-3-8b-chat-hf"
18
 
19
  class URLToAudioConverter:
20
  def __init__(self, config: ConversationConfig, llm_api_key: str):
@@ -34,23 +34,39 @@ class URLToAudioConverter:
34
  raise RuntimeError(f"Failed to fetch URL: {e}")
35
 
36
  def extract_conversation(self, text: str) -> Dict:
 
37
  if not text:
38
  raise ValueError("Input text cannot be empty")
 
 
 
 
 
 
 
39
  try:
40
- prompt = (
41
- f"{text}\nConvert this text into a podcast conversation between two hosts. "
42
- "Return ONLY JSON with this structure:\n"
43
- '{"conversation": [{"speaker": "Host1", "text": "..."}, {"speaker": "Host2", "text": "..."}]}'
44
- )
45
  response = self.llm_client.chat.completions.create(
46
  messages=[{"role": "user", "content": prompt}],
47
  model=self.config.model_name,
48
- response_format={"type": "json_object"}
49
  )
50
- json_str = response.choices[0].message.content.strip()
51
- return json.loads(json_str[json_str.find('{'):json_str.rfind('}')+1])
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
- raise RuntimeError(f"Failed to extract conversation: {str(e)}")
54
 
55
  async def text_to_speech(self, conversation_json: Dict, voice_1: str, voice_2: str) -> Tuple[List[str], str]:
56
  output_dir = Path(self._create_output_directory())
@@ -100,7 +116,7 @@ class URLToAudioConverter:
100
  ) -> AudioSegment:
101
  music = AudioSegment.from_file(music_path).fade_out(2000) - 25
102
  if len(music) < len(speech_audio):
103
- music = music * ((len(speech_audio) // len(music)) + 1)
104
  music = music[:len(speech_audio)]
105
  mixed = speech_audio.overlay(music)
106
 
@@ -108,6 +124,7 @@ class URLToAudioConverter:
108
  tag_trans = AudioSegment.from_file(tags_paths[1]) - 10
109
  final_audio = tag_intro + mixed
110
 
 
111
  silent_ranges = []
112
  for i in range(0, len(speech_audio) - 500, 100):
113
  chunk = speech_audio[i:i+500]
@@ -132,7 +149,7 @@ class URLToAudioConverter:
132
  return await self._process_to_audio(conversation, voice_1, voice_2)
133
 
134
  async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
135
- conversation = {"conversation": [{"speaker": "Narrator", "text": text}]}
136
  return await self._process_to_audio(conversation, voice_1, voice_2)
137
 
138
  async def _process_to_audio(
@@ -148,7 +165,7 @@ class URLToAudioConverter:
148
  "musica.mp3",
149
  ["tag.mp3", "tag2.mp3"]
150
  )
151
- output_path = os.path.join(folder_name, "output.mp3")
152
  final_audio.export(output_path, format="mp3")
153
 
154
  for f in audio_files:
 
1
  from dataclasses import dataclass
2
+ from typing import List, Tuple, Dict
3
  import os
4
  import json
5
  import httpx
 
14
  class ConversationConfig:
15
  max_words: int = 3000
16
  prefix_url: str = "https://r.jina.ai/"
17
+ model_name: str = "meta-llama/Llama-3-8b-chat-hf" # Modelo serverless
18
 
19
  class URLToAudioConverter:
20
  def __init__(self, config: ConversationConfig, llm_api_key: str):
 
34
  raise RuntimeError(f"Failed to fetch URL: {e}")
35
 
36
  def extract_conversation(self, text: str) -> Dict:
37
+ """Versión que parsea 'Host1: texto' -> JSON"""
38
  if not text:
39
  raise ValueError("Input text cannot be empty")
40
+
41
+ prompt = (
42
+ f"{text}\nCreate a podcast dialogue between Host1 and Host2. "
43
+ "Use EXACTLY this format:\n\n"
44
+ "Host1: [message]\nHost2: [reply]\nHost1: [response]..."
45
+ )
46
+
47
  try:
 
 
 
 
 
48
  response = self.llm_client.chat.completions.create(
49
  messages=[{"role": "user", "content": prompt}],
50
  model=self.config.model_name,
51
+ temperature=0.7
52
  )
53
+ raw_dialogue = response.choices[0].message.content
54
+
55
+ # Parseo seguro del formato
56
+ conversation = {"conversation": []}
57
+ for line in raw_dialogue.split('\n'):
58
+ if ':' in line:
59
+ speaker, _, content = line.partition(':')
60
+ if speaker.strip() in ("Host1", "Host2"):
61
+ conversation["conversation"].append({
62
+ "speaker": speaker.strip(),
63
+ "text": content.strip()
64
+ })
65
+
66
+ return conversation
67
+
68
  except Exception as e:
69
+ raise RuntimeError(f"Failed to parse dialogue: {str(e)}")
70
 
71
  async def text_to_speech(self, conversation_json: Dict, voice_1: str, voice_2: str) -> Tuple[List[str], str]:
72
  output_dir = Path(self._create_output_directory())
 
116
  ) -> AudioSegment:
117
  music = AudioSegment.from_file(music_path).fade_out(2000) - 25
118
  if len(music) < len(speech_audio):
119
+ music = music * ((len(speech_audio) // len(music) + 1)
120
  music = music[:len(speech_audio)]
121
  mixed = speech_audio.overlay(music)
122
 
 
124
  tag_trans = AudioSegment.from_file(tags_paths[1]) - 10
125
  final_audio = tag_intro + mixed
126
 
127
+ # Insertar tags en silencios >500ms
128
  silent_ranges = []
129
  for i in range(0, len(speech_audio) - 500, 100):
130
  chunk = speech_audio[i:i+500]
 
149
  return await self._process_to_audio(conversation, voice_1, voice_2)
150
 
151
  async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
152
+ conversation = {"conversation": [{"speaker": "Host1", "text": text}]}
153
  return await self._process_to_audio(conversation, voice_1, voice_2)
154
 
155
  async def _process_to_audio(
 
165
  "musica.mp3",
166
  ["tag.mp3", "tag2.mp3"]
167
  )
168
+ output_path = os.path.join(folder_name, "podcast_final.mp3")
169
  final_audio.export(output_path, format="mp3")
170
 
171
  for f in audio_files: