Spaces:
Sleeping
Sleeping
File size: 3,493 Bytes
6250169 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import os
import requests
from dotenv import load_dotenv
import subprocess
import shutil
import time
from deepgram import Deepgram
# brew install portaudio
# Load environment variables
load_dotenv()
# Set your Deepgram API Key and desired voice model
DG_API_KEY = os.getenv("DEEPGRAM_API_KEY")
MODEL_NAME = "alpha-stella-en-v2" # Example model name, change as needed
def is_installed(lib_name: str) -> bool:
lib = shutil.which(lib_name)
return lib is not None
def play_stream(audio_stream, use_ffmpeg=True):
player = "ffplay"
if not is_installed(player):
raise ValueError(f"{player} not found, necessary to stream audio.")
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
player_process = subprocess.Popen(
player_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
for chunk in audio_stream:
if chunk:
player_process.stdin.write(chunk) # type: ignore
player_process.stdin.flush() # type: ignore
if player_process.stdin:
player_process.stdin.close()
player_process.wait()
def send_tts_request(text):
DEEPGRAM_URL = f"https://api.beta.deepgram.com/v1/speak?model={MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
headers = {
"Authorization": f"Token {DG_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"text": text,
"voice": MODEL_NAME
}
start_time = time.time() # Record the time before sending the request
first_byte_time = None # Initialize a variable to store the time when the first byte is received
# Initialize the player process here, before receiving the stream
player = "ffplay"
if not is_installed(player):
raise ValueError(f"{player} not found, necessary to stream audio.")
player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
player_process = subprocess.Popen(
player_command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
start_time = time.time() # Record the time before sending the request
first_byte_time = None # Initialize a variable to store the time when the first byte is received
with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
# dg_performance_total_ms = r.headers.get('x-dg-performance-total-ms', 'Not Available')
# print(f"Deepgram Performance Total (ms): {dg_performance_total_ms}ms")
for chunk in r.iter_content(chunk_size=1024):
if chunk:
if first_byte_time is None: # Check if this is the first chunk received
first_byte_time = time.time() # Record the time when the first byte is received
ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
print(f"Time to First Byte (TTFB): {ttfb}ms")
# Write each chunk to the player's stdin immediately
player_process.stdin.write(chunk) # type: ignore
player_process.stdin.flush() # type: ignore
# Close the player's stdin and wait for the process to finish
if player_process.stdin:
player_process.stdin.close()
player_process.wait()
# Example usage with saving to file
text = """
The returns for performance are superlinear."""
send_tts_request(text) |