Spaces:
Sleeping
Sleeping
import os | |
import requests | |
from dotenv import load_dotenv | |
import subprocess | |
import shutil | |
import time | |
from deepgram import Deepgram | |
# brew install portaudio | |
# Load environment variables | |
load_dotenv() | |
# Set your Deepgram API Key and desired voice model | |
DG_API_KEY = os.getenv("DEEPGRAM_API_KEY") | |
MODEL_NAME = "alpha-stella-en-v2" # Example model name, change as needed | |
def is_installed(lib_name: str) -> bool: | |
lib = shutil.which(lib_name) | |
return lib is not None | |
def play_stream(audio_stream, use_ffmpeg=True): | |
player = "ffplay" | |
if not is_installed(player): | |
raise ValueError(f"{player} not found, necessary to stream audio.") | |
player_command = ["ffplay", "-autoexit", "-", "-nodisp"] | |
player_process = subprocess.Popen( | |
player_command, | |
stdin=subprocess.PIPE, | |
stdout=subprocess.DEVNULL, | |
stderr=subprocess.DEVNULL, | |
) | |
for chunk in audio_stream: | |
if chunk: | |
player_process.stdin.write(chunk) # type: ignore | |
player_process.stdin.flush() # type: ignore | |
if player_process.stdin: | |
player_process.stdin.close() | |
player_process.wait() | |
def send_tts_request(text): | |
DEEPGRAM_URL = f"https://api.beta.deepgram.com/v1/speak?model={MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000" | |
headers = { | |
"Authorization": f"Token {DG_API_KEY}", | |
"Content-Type": "application/json" | |
} | |
payload = { | |
"text": text, | |
"voice": MODEL_NAME | |
} | |
start_time = time.time() # Record the time before sending the request | |
first_byte_time = None # Initialize a variable to store the time when the first byte is received | |
# Initialize the player process here, before receiving the stream | |
player = "ffplay" | |
if not is_installed(player): | |
raise ValueError(f"{player} not found, necessary to stream audio.") | |
player_command = ["ffplay", "-autoexit", "-", "-nodisp"] | |
player_process = subprocess.Popen( | |
player_command, | |
stdin=subprocess.PIPE, | |
stdout=subprocess.DEVNULL, | |
stderr=subprocess.DEVNULL, | |
) | |
start_time = time.time() # Record the time before sending the request | |
first_byte_time = None # Initialize a variable to store the time when the first byte is received | |
with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r: | |
# dg_performance_total_ms = r.headers.get('x-dg-performance-total-ms', 'Not Available') | |
# print(f"Deepgram Performance Total (ms): {dg_performance_total_ms}ms") | |
for chunk in r.iter_content(chunk_size=1024): | |
if chunk: | |
if first_byte_time is None: # Check if this is the first chunk received | |
first_byte_time = time.time() # Record the time when the first byte is received | |
ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte | |
print(f"Time to First Byte (TTFB): {ttfb}ms") | |
# Write each chunk to the player's stdin immediately | |
player_process.stdin.write(chunk) # type: ignore | |
player_process.stdin.flush() # type: ignore | |
# Close the player's stdin and wait for the process to finish | |
if player_process.stdin: | |
player_process.stdin.close() | |
player_process.wait() | |
# Example usage with saving to file | |
text = """ | |
The returns for performance are superlinear.""" | |
send_tts_request(text) |