Matt-CB's picture
v0.1
6250169
import os
import requests
from dotenv import load_dotenv
import subprocess
import shutil
import time
from deepgram import Deepgram
# brew install portaudio
# Load environment variables
load_dotenv()
# Set your Deepgram API Key and desired voice model
DG_API_KEY = os.getenv("DEEPGRAM_API_KEY")
MODEL_NAME = "alpha-stella-en-v2" # Example model name, change as needed
def is_installed(lib_name: str) -> bool:
lib = shutil.which(lib_name)
return lib is not None
def play_stream(audio_stream, use_ffmpeg=True):
player = "ffplay"
if not is_installed(player):
raise ValueError(f"{player} not found, necessary to stream audio.")
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
player_process = subprocess.Popen(
player_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
for chunk in audio_stream:
if chunk:
player_process.stdin.write(chunk) # type: ignore
player_process.stdin.flush() # type: ignore
if player_process.stdin:
player_process.stdin.close()
player_process.wait()
def send_tts_request(text):
DEEPGRAM_URL = f"https://api.beta.deepgram.com/v1/speak?model={MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
headers = {
"Authorization": f"Token {DG_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"text": text,
"voice": MODEL_NAME
}
start_time = time.time() # Record the time before sending the request
first_byte_time = None # Initialize a variable to store the time when the first byte is received
# Initialize the player process here, before receiving the stream
player = "ffplay"
if not is_installed(player):
raise ValueError(f"{player} not found, necessary to stream audio.")
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
player_process = subprocess.Popen(
player_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
start_time = time.time() # Record the time before sending the request
first_byte_time = None # Initialize a variable to store the time when the first byte is received
with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
# dg_performance_total_ms = r.headers.get('x-dg-performance-total-ms', 'Not Available')
# print(f"Deepgram Performance Total (ms): {dg_performance_total_ms}ms")
for chunk in r.iter_content(chunk_size=1024):
if chunk:
if first_byte_time is None: # Check if this is the first chunk received
first_byte_time = time.time() # Record the time when the first byte is received
ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
print(f"Time to First Byte (TTFB): {ttfb}ms")
# Write each chunk to the player's stdin immediately
player_process.stdin.write(chunk) # type: ignore
player_process.stdin.flush() # type: ignore
# Close the player's stdin and wait for the process to finish
if player_process.stdin:
player_process.stdin.close()
player_process.wait()
# Example usage with saving to file
text = """
The returns for performance are superlinear."""
send_tts_request(text)