siddhartharya commited on
Commit
45aea5e
1 Parent(s): 7fbecd3

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +22 -12
utils.py CHANGED
@@ -8,16 +8,13 @@ import re
8
  import tempfile
9
  import requests
10
  from bs4 import BeautifulSoup
11
- from TTS.api import TTS
12
- import torch
 
13
 
14
  groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
15
  tokenizer = tiktoken.get_encoding("cl100k_base")
16
 
17
- # Initialize TTS models
18
- tts_maria = TTS("tts_models/en/ljspeech/tacotron2-DDC")
19
- tts_sarah = TTS("tts_models/en/ljspeech/glow-tts")
20
-
21
  class DialogueItem(BaseModel):
22
  speaker: Literal["Maria", "Sarah"]
23
  text: str
@@ -99,10 +96,23 @@ def generate_script(system_prompt: str, input_text: str, tone: str, target_lengt
99
 
100
  return dialogue
101
 
102
- def generate_audio(text: str, speaker: str) -> str:
 
 
 
 
 
 
 
 
 
103
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
104
- if speaker == "Maria":
105
- tts_maria.tts_to_file(text=text, file_path=temp_audio.name)
106
- else: # Sarah
107
- tts_sarah.tts_to_file(text=text, file_path=temp_audio.name)
108
- return temp_audio.name
 
 
 
 
 
8
  import tempfile
9
  import requests
10
  from bs4 import BeautifulSoup
11
+ import subprocess
12
+ import pyttsx3
13
+ from pydub import AudioSegment
14
 
15
  groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
16
  tokenizer = tiktoken.get_encoding("cl100k_base")
17
 
 
 
 
 
18
  class DialogueItem(BaseModel):
19
  speaker: Literal["Maria", "Sarah"]
20
  text: str
 
96
 
97
  return dialogue
98
 
99
+ def generate_audio_espeak(text: str, speaker: str) -> str:
100
+ voice = "en-us+f3" if speaker == "Maria" else "en-gb+f3"
101
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
102
+ subprocess.call(['espeak-ng', '-v', voice, '-w', temp_audio.name, text])
103
+ return temp_audio.name
104
+
105
+ def generate_audio_pyttsx3(text: str, speaker: str) -> str:
106
+ engine = pyttsx3.init()
107
+ voices = engine.getProperty('voices')
108
+ engine.setProperty('voice', voices[1].id if speaker == "Maria" else voices[0].id)
109
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
110
+ engine.save_to_file(text, temp_audio.name)
111
+ engine.runAndWait()
112
+ return temp_audio.name
113
+
114
+ def generate_audio(text: str, speaker: str) -> str:
115
+ try:
116
+ return generate_audio_espeak(text, speaker)
117
+ except Exception:
118
+ return generate_audio_pyttsx3(text, speaker)