Spaces:

Matt-CB
/

Language_AI_Practice

Sleeping

App Files Files Community

Language_AI_Practice / building_blocks /text_to_speech.py

Matt-CB

v0.1

6250169 over 1 year ago

raw

history blame

3.49 kB

	import os
	import requests
	from dotenv import load_dotenv
	import subprocess
	import shutil
	import time
	from deepgram import Deepgram

	# brew install portaudio

	# Load environment variables
	load_dotenv()

	# Set your Deepgram API Key and desired voice model
	DG_API_KEY = os.getenv("DEEPGRAM_API_KEY")
	MODEL_NAME = "alpha-stella-en-v2" # Example model name, change as needed

	def is_installed(lib_name: str) -> bool:
	lib = shutil.which(lib_name)
	return lib is not None

	def play_stream(audio_stream, use_ffmpeg=True):
	player = "ffplay"
	if not is_installed(player):
	raise ValueError(f"{player} not found, necessary to stream audio.")

	player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
	player_process = subprocess.Popen(
	player_command,
	stdin=subprocess.PIPE,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	)

	for chunk in audio_stream:
	if chunk:
	player_process.stdin.write(chunk) # type: ignore
	player_process.stdin.flush() # type: ignore

	if player_process.stdin:
	player_process.stdin.close()
	player_process.wait()

	def send_tts_request(text):
	DEEPGRAM_URL = f"https://api.beta.deepgram.com/v1/speak?model={MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"

	headers = {
	"Authorization": f"Token {DG_API_KEY}",
	"Content-Type": "application/json"
	}

	payload = {
	"text": text,
	"voice": MODEL_NAME
	}

	start_time = time.time() # Record the time before sending the request
	first_byte_time = None # Initialize a variable to store the time when the first byte is received

	# Initialize the player process here, before receiving the stream
	player = "ffplay"
	if not is_installed(player):
	raise ValueError(f"{player} not found, necessary to stream audio.")

	player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
	player_process = subprocess.Popen(
	player_command,
	stdin=subprocess.PIPE,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	)

	start_time = time.time() # Record the time before sending the request
	first_byte_time = None # Initialize a variable to store the time when the first byte is received

	with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
	# dg_performance_total_ms = r.headers.get('x-dg-performance-total-ms', 'Not Available')
	# print(f"Deepgram Performance Total (ms): {dg_performance_total_ms}ms")

	for chunk in r.iter_content(chunk_size=1024):
	if chunk:
	if first_byte_time is None: # Check if this is the first chunk received
	first_byte_time = time.time() # Record the time when the first byte is received
	ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
	print(f"Time to First Byte (TTFB): {ttfb}ms")
	# Write each chunk to the player's stdin immediately
	player_process.stdin.write(chunk) # type: ignore
	player_process.stdin.flush() # type: ignore

	# Close the player's stdin and wait for the process to finish
	if player_process.stdin:
	player_process.stdin.close()
	player_process.wait()

	# Example usage with saving to file
	text = """
	The returns for performance are superlinear."""
	send_tts_request(text)