sreepathi-ravikumar commited on
Commit
e9a4e37
·
verified ·
1 Parent(s): b155fe7

Update audio_generator.py

Browse files
Files changed (1) hide show
  1. audio_generator.py +39 -33
audio_generator.py CHANGED
@@ -1,34 +1,40 @@
 
 
 
 
 
 
 
1
  import os
2
- import edge_tts
3
- import asyncio
4
- import logging
5
- from datetime import datetime
6
-
7
- logging.basicConfig(level=logging.INFO)
8
- logger = logging.getLogger(__name__)
9
-
10
- async def _generate_speech(text: str, output_path: str) -> str:
11
- try:
12
- communicate = edge_tts.Communicate(
13
- text=text,
14
- voice="en-US-AriaNeural",
15
- rate="+0%",
16
- volume="+0%"
17
- )
18
- await communicate.save(output_path)
19
- return output_path
20
- except Exception as e:
21
- logger.error(f"Generation failed: {str(e)}")
22
- raise RuntimeError(f"Audio generation error: {str(e)}")
23
-
24
- def generate_audio(text: str) -> str:
25
- """Main entry point for audio generation"""
26
- try:
27
- os.makedirs("tts_outputs", exist_ok=True)
28
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
29
- output_path = os.path.join("tts_outputs", f"tts_{timestamp}.mp3")
30
-
31
- return asyncio.run(_generate_speech(text, output_path))
32
- except Exception as e:
33
- logger.error(f"Audio generation failed: {str(e)}")
34
- raise
 
1
+ # audio_generation.py
2
+
3
+ from transformers import AutoProcessor, BarkModel
4
+ import torch
5
+ import numpy as np
6
+ from scipy.io.wavfile import write as write_wav
7
+ from pydub import AudioSegment
8
  import os
9
+ import uuid
10
+
11
+ processor = AutoProcessor.from_pretrained("suno/bark")
12
+ model = BarkModel.from_pretrained("suno/bark")
13
+
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ model.to(device)
16
+
17
+ def split_text(text, max_len=150):
18
+ return [text[i:i+max_len] for i in range(0, len(text), max_len)]
19
+
20
+ def generate_audio(text, output_dir="audios"):
21
+ os.makedirs(output_dir, exist_ok=True)
22
+ chunks = split_text(text)
23
+ final_audio = AudioSegment.empty()
24
+
25
+ for idx, chunk in enumerate(chunks):
26
+ inputs = processor(chunk, return_tensors="pt").to(device)
27
+ audio_array = model.generate(**inputs)
28
+ audio_array = audio_array.cpu().numpy().squeeze()
29
+ audio_array = audio_array / np.max(np.abs(audio_array))
30
+
31
+ temp_path = os.path.join(output_dir, f"{uuid.uuid4()}.wav")
32
+ write_wav(temp_path, rate=22050, data=audio_array)
33
+
34
+ segment = AudioSegment.from_wav(temp_path)
35
+ final_audio += segment
36
+ os.remove(temp_path)
37
+
38
+ final_filename = os.path.join(output_dir, f"{uuid.uuid4()}_final.wav")
39
+ final_audio.export(final_filename, format="wav")
40
+ return final_filename