gnosticdev commited on
Commit
187ec28
·
verified ·
1 Parent(s): bf07215

Update conver.py

Browse files
Files changed (1) hide show
  1. conver.py +48 -114
conver.py CHANGED
@@ -10,6 +10,7 @@ import tempfile
10
  from pydub import AudioSegment
11
  import base64
12
  from pathlib import Path
 
13
 
14
  @dataclass
15
  class ConversationConfig:
@@ -26,86 +27,50 @@ class URLToAudioConverter:
26
  def fetch_text(self, url: str) -> str:
27
  if not url:
28
  raise ValueError("URL cannot be empty")
29
-
30
- full_url = f"{self.config.prefix_url}{url}"
31
- try:
32
- response = httpx.get(full_url, timeout=60.0)
33
- response.raise_for_status()
34
- return response.text
35
- except httpx.HTTPError as e:
36
- raise RuntimeError(f"Failed to fetch URL: {e}")
37
 
38
  def extract_conversation(self, text: str) -> Dict:
39
- if not text:
40
- raise ValueError("Input text cannot be empty")
41
-
42
- try:
43
- prompt = (
44
- f"{text}\nConvert the provided text into a short informative podcast conversation "
45
- f"between two experts. Return ONLY a JSON object with the following structure:\n"
46
- '{"conversation": [{"speaker": "Speaker1", "text": "..."}, {"speaker": "Speaker2", "text": "..."}]}'
47
- )
48
-
49
- chat_completion = self.llm_client.chat.completions.create(
50
- messages=[{"role": "user", "content": prompt}],
51
- model=self.config.model_name,
52
- response_format={"type": "json_object"}
53
- )
54
-
55
- response_content = chat_completion.choices[0].message.content
56
- json_str = response_content.strip()
57
-
58
- if not json_str.startswith('{'):
59
- start = json_str.find('{')
60
- if start != -1:
61
- json_str = json_str[start:]
62
-
63
- if not json_str.endswith('}'):
64
- end = json_str.rfind('}')
65
- if end != -1:
66
- json_str = json_str[:end+1]
67
-
68
- return json.loads(json_str)
69
- except Exception as e:
70
- print(f"Error en extract_conversation: {str(e)}")
71
- print(f"Respuesta del modelo: {response_content}")
72
- raise RuntimeError(f"Failed to extract conversation: {str(e)}")
73
 
74
  async def text_to_speech(self, conversation_json: Dict, voice_1: str, voice_2: str) -> Tuple[List[str], str]:
75
  output_dir = Path(self._create_output_directory())
76
  filenames = []
77
-
78
- try:
79
- for i, turn in enumerate(conversation_json["conversation"]):
80
- filename = output_dir / f"output_{i}.mp3"
81
- voice = voice_1 if i % 2 == 0 else voice_2
82
-
83
- tmp_path, error = await self._generate_audio(turn["text"], voice)
84
- if error:
85
- raise RuntimeError(f"Text-to-speech failed: {error}")
86
-
87
- os.rename(tmp_path, filename)
88
- filenames.append(str(filename))
89
-
90
- return filenames, str(output_dir)
91
- except Exception as e:
92
- raise RuntimeError(f"Failed to convert text to speech: {e}")
93
 
94
  async def _generate_audio(self, text: str, voice: str, rate: int = 0, pitch: int = 0) -> Tuple[str, str]:
95
- if not text.strip():
96
- return None, "Text cannot be empty"
97
- if not voice:
98
- return None, "Voice cannot be empty"
99
-
100
  voice_short_name = voice.split(" - ")[0]
101
  rate_str = f"{rate:+d}%"
102
  pitch_str = f"{pitch:+d}Hz"
103
  communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
104
-
105
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
106
  tmp_path = tmp_file.name
107
  await communicate.save(tmp_path)
108
-
109
  return tmp_path, None
110
 
111
  def _create_output_directory(self) -> str:
@@ -115,70 +80,39 @@ class URLToAudioConverter:
115
  return folder_name
116
 
117
  def combine_audio_files(self, filenames: List[str], output_file: str) -> None:
118
- if not filenames:
119
- raise ValueError("No input files provided")
120
-
121
- try:
122
- combined = AudioSegment.empty()
123
- for filename in filenames:
124
- audio_segment = AudioSegment.from_file(filename, format="mp3")
125
- combined += audio_segment
126
-
127
- combined.export(output_file, format="mp3")
128
-
129
- # Limpieza
130
- dir_path = os.path.dirname(filenames[0])
131
- for file in os.listdir(dir_path):
132
- file_path = os.path.join(dir_path, file)
133
- if os.path.isfile(file_path):
134
- os.remove(file_path)
135
- os.rmdir(dir_path)
136
-
137
- except Exception as e:
138
- raise RuntimeError(f"Failed to combine audio files: {e}")
139
 
140
  async def url_to_audio(self, url: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
141
  text = self.fetch_text(url)
142
-
143
  words = text.split()
144
  if len(words) > self.config.max_words:
145
- text = " ".join(words[:self.config.max_words])
146
-
147
  conversation_json = self.extract_conversation(text)
148
- conversation_text = "\n".join(
149
- f"{turn['speaker']}: {turn['text']}" for turn in conversation_json["conversation"]
150
- )
151
  self.llm_out = conversation_json
152
- audio_files, folder_name = await self.text_to_speech(
153
- conversation_json, voice_1, voice_2
154
- )
155
-
156
  final_output = os.path.join(folder_name, "combined_output.mp3")
157
  self.combine_audio_files(audio_files, final_output)
158
  return final_output, conversation_text
159
 
160
  async def text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
161
- """Procesamiento normal con LLM"""
162
  conversation_json = self.extract_conversation(text)
163
- conversation_text = "\n".join(
164
- f"{turn['speaker']}: {turn['text']}" for turn in conversation_json["conversation"]
165
- )
166
- audio_files, folder_name = await self.text_to_speech(
167
- conversation_json, voice_1, voice_2
168
- )
169
  final_output = os.path.join(folder_name, "combined_output.mp3")
170
  self.combine_audio_files(audio_files, final_output)
171
  return final_output, conversation_text
172
 
173
  async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
174
- """NUEVO: Modo sin LLM (texto directo)"""
175
- conversation = {
176
- "conversation": [
177
- {"speaker": "Host", "text": text},
178
- {"speaker": "Co-host", "text": "(Continuación del tema)"}
179
- ]
180
- }
181
- audio_files, folder_name = await self.text_to_speech(conversation, voice_1, voice_2)
182
- output_file = os.path.join(folder_name, "raw_podcast.mp3")
183
- self.combine_audio_files(audio_files, output_file)
184
- return text, output_file
 
10
  from pydub import AudioSegment
11
  import base64
12
  from pathlib import Path
13
+ import hashlib
14
 
15
  @dataclass
16
  class ConversationConfig:
 
27
  def fetch_text(self, url: str) -> str:
28
  if not url:
29
  raise ValueError("URL cannot be empty")
30
+ response = httpx.get(f"{self.config.prefix_url}{url}", timeout=60.0)
31
+ response.raise_for_status()
32
+ return response.text
 
 
 
 
 
33
 
34
  def extract_conversation(self, text: str) -> Dict:
35
+ prompt = (
36
+ f"{text}\nConvert the provided text into a short informative podcast conversation "
37
+ f"between two experts. Return ONLY a JSON object with the following structure:\n"
38
+ '{"conversation": [{"speaker": "Speaker1", "text": "..."}, {"speaker": "Speaker2", "text": "..."}]}'
39
+ )
40
+ chat_completion = self.llm_client.chat.completions.create(
41
+ messages=[{"role": "user", "content": prompt}],
42
+ model=self.config.model_name,
43
+ response_format={"type": "json_object"}
44
+ )
45
+ response_content = chat_completion.choices[0].message.content
46
+ json_str = response_content.strip()
47
+ if not json_str.startswith("{"):
48
+ json_str = json_str[json_str.find("{"):]
49
+ if not json_str.endswith("}"):
50
+ json_str = json_str[: json_str.rfind("}") + 1]
51
+ return json.loads(json_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  async def text_to_speech(self, conversation_json: Dict, voice_1: str, voice_2: str) -> Tuple[List[str], str]:
54
  output_dir = Path(self._create_output_directory())
55
  filenames = []
56
+ for i, turn in enumerate(conversation_json["conversation"]):
57
+ voice = voice_1 if i % 2 == 0 else voice_2
58
+ tmp_path, error = await self._generate_audio(turn["text"], voice)
59
+ if error:
60
+ raise RuntimeError(f"Text-to-speech failed: {error}")
61
+ filename = output_dir / f"output_{i}.mp3"
62
+ os.rename(tmp_path, filename)
63
+ filenames.append(str(filename))
64
+ return filenames, str(output_dir)
 
 
 
 
 
 
 
65
 
66
  async def _generate_audio(self, text: str, voice: str, rate: int = 0, pitch: int = 0) -> Tuple[str, str]:
 
 
 
 
 
67
  voice_short_name = voice.split(" - ")[0]
68
  rate_str = f"{rate:+d}%"
69
  pitch_str = f"{pitch:+d}Hz"
70
  communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
 
71
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
72
  tmp_path = tmp_file.name
73
  await communicate.save(tmp_path)
 
74
  return tmp_path, None
75
 
76
  def _create_output_directory(self) -> str:
 
80
  return folder_name
81
 
82
  def combine_audio_files(self, filenames: List[str], output_file: str) -> None:
83
+ combined = AudioSegment.empty()
84
+ for filename in filenames:
85
+ combined += AudioSegment.from_file(filename, format="mp3")
86
+ combined.export(output_file, format="mp3")
87
+ dir_path = os.path.dirname(filenames[0])
88
+ for file in os.listdir(dir_path):
89
+ os.remove(os.path.join(dir_path, file))
90
+ os.rmdir(dir_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  async def url_to_audio(self, url: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
93
  text = self.fetch_text(url)
 
94
  words = text.split()
95
  if len(words) > self.config.max_words:
96
+ text = " ".join(words[: self.config.max_words])
 
97
  conversation_json = self.extract_conversation(text)
98
+ conversation_text = "\n".join(f"{t['speaker']}: {t['text']}" for t in conversation_json["conversation"])
 
 
99
  self.llm_out = conversation_json
100
+ audio_files, folder_name = await self.text_to_speech(conversation_json, voice_1, voice_2)
 
 
 
101
  final_output = os.path.join(folder_name, "combined_output.mp3")
102
  self.combine_audio_files(audio_files, final_output)
103
  return final_output, conversation_text
104
 
105
  async def text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
 
106
  conversation_json = self.extract_conversation(text)
107
+ conversation_text = "\n".join(f"{t['speaker']}: {t['text']}" for t in conversation_json["conversation"])
108
+ audio_files, folder_name = await self.text_to_speech(conversation_json, voice_1, voice_2)
 
 
 
 
109
  final_output = os.path.join(folder_name, "combined_output.mp3")
110
  self.combine_audio_files(audio_files, final_output)
111
  return final_output, conversation_text
112
 
113
  async def raw_text_to_audio(self, text: str, voice_1: str, voice_2: str) -> Tuple[str, str]:
114
+ hash_name = hashlib.md5(text.encode()).hexdigest()[:8]
115
+ output_file = f"podcast_{hash_name}.mp3"
116
+ communicate = edge_tts.Communicate(text, voice_1.split(" - ")[0])
117
+ await communicate.save(output_file)
118
+ return text, output_file