Spaces:

Matt-CB
/

Language_AI_Practice

Sleeping

App Files Files Community

Language_AI_Practice / QuickAgent.py

Matt-CB

v0.1

6250169 over 1 year ago

raw

history blame

7.56 kB

	import asyncio
	from dotenv import load_dotenv
	import shutil
	import subprocess
	import requests
	import time
	import os

	from langchain_core.prompts import ChatPromptTemplate
	from langchain_groq import ChatGroq
	from langchain_openai import ChatOpenAI
	from langchain.memory import ConversationBufferMemory
	from langchain.prompts import (
	ChatPromptTemplate,
	MessagesPlaceholder,
	SystemMessagePromptTemplate,
	HumanMessagePromptTemplate,
	)
	from langchain.chains import LLMChain

	from deepgram import (
	DeepgramClient,
	DeepgramClientOptions,
	LiveTranscriptionEvents,
	LiveOptions,
	Microphone,
	)

	load_dotenv()

	class LanguageModelProcessor:
	def __init__(self, groq_api_key):
	self.llm = ChatGroq(temperature=0, model_name="mixtral-8x7b-32768", groq_api_key=groq_api_key)

	self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

	# Load the system prompt from a file
	with open('system_prompt.txt', 'r') as file:
	system_prompt = file.read().strip()

	self.prompt = ChatPromptTemplate.from_messages([
	SystemMessagePromptTemplate.from_template(system_prompt),
	MessagesPlaceholder(variable_name="chat_history"),
	HumanMessagePromptTemplate.from_template("{text}")
	])

	self.conversation = LLMChain(
	llm=self.llm,
	prompt=self.prompt,
	memory=self.memory
	)

	def process(self, text):
	self.memory.chat_memory.add_user_message(text) # Add user message to memory

	start_time = time.time()

	# Go get the response from the LLM
	response = self.conversation.invoke({"text": text})
	end_time = time.time()

	self.memory.chat_memory.add_ai_message(response['text']) # Add AI response to memory

	elapsed_time = int((end_time - start_time) * 1000)
	print(f"LLM ({elapsed_time}ms): {response['text']}")
	return response['text']

	class TextToSpeech:
	def __init__(self, deepgram_api_key):
	self.DG_API_KEY = deepgram_api_key
	# Set your Deepgram API Key and desired voice model

	MODEL_NAME = "aura-helios-en" # Example model name, change as needed

	@staticmethod
	def is_installed(lib_name: str) -> bool:
	lib = shutil.which(lib_name)
	return lib is not None

	def speak(self, text):
	if not self.is_installed("ffplay"):
	raise ValueError("ffplay not found, necessary to stream audio.")

	DEEPGRAM_URL = f"https://api.deepgram.com/v1/speak?model={self.MODEL_NAME}&performance=some&encoding=linear16&sample_rate=24000"
	headers = {
	"Authorization": f"Token {self.DG_API_KEY}",
	"Content-Type": "application/json"
	}
	payload = {
	"text": text
	}

	player_command = ["ffplay", "-autoexit", "-", "-nodisp"]
	player_process = subprocess.Popen(
	player_command,
	stdin=subprocess.PIPE,
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	)

	start_time = time.time() # Record the time before sending the request
	first_byte_time = None # Initialize a variable to store the time when the first byte is received

	with requests.post(DEEPGRAM_URL, stream=True, headers=headers, json=payload) as r:
	for chunk in r.iter_content(chunk_size=1024):
	if chunk:
	if first_byte_time is None: # Check if this is the first chunk received
	first_byte_time = time.time() # Record the time when the first byte is received
	ttfb = int((first_byte_time - start_time)*1000) # Calculate the time to first byte
	print(f"TTS Time to First Byte (TTFB): {ttfb}ms\n")
	player_process.stdin.write(chunk)
	player_process.stdin.flush()

	if player_process.stdin:
	player_process.stdin.close()
	player_process.wait()

	class TranscriptCollector:
	def __init__(self):
	self.reset()

	def reset(self):
	self.transcript_parts = []

	def add_part(self, part):
	self.transcript_parts.append(part)

	def get_full_transcript(self):
	return ' '.join(self.transcript_parts)

	transcript_collector = TranscriptCollector()

	async def get_transcript(callback):
	transcription_complete = asyncio.Event() # Event to signal transcription completion

	try:
	# example of setting up a client config. logging values: WARNING, VERBOSE, DEBUG, SPAM
	config = DeepgramClientOptions(options={"keepalive": "true"})
	deepgram: DeepgramClient = DeepgramClient("", config)

	dg_connection = deepgram.listen.asynclive.v("1")
	print ("Listening...")

	async def on_message(self, result, **kwargs):
	sentence = result.channel.alternatives[0].transcript

	if not result.speech_final:
	transcript_collector.add_part(sentence)
	else:
	# This is the final part of the current sentence
	transcript_collector.add_part(sentence)
	full_sentence = transcript_collector.get_full_transcript()
	# Check if the full_sentence is not empty before printing
	if len(full_sentence.strip()) > 0:
	full_sentence = full_sentence.strip()
	print(f"Human: {full_sentence}")
	callback(full_sentence) # Call the callback with the full_sentence
	transcript_collector.reset()
	transcription_complete.set() # Signal to stop transcription and exit

	dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)

	options = LiveOptions(
	model="nova-2",
	punctuate=True,
	language="en-US",
	encoding="linear16",
	channels=1,
	sample_rate=16000,
	endpointing=300,
	smart_format=True,
	)

	await dg_connection.start(options)

	# Open a microphone stream on the default input device
	microphone = Microphone(dg_connection.send)
	microphone.start()

	await transcription_complete.wait() # Wait for the transcription to complete instead of looping indefinitely

	# Wait for the microphone to close
	microphone.finish()

	# Indicate that we've finished
	await dg_connection.finish()

	except Exception as e:
	print(f"Could not open socket: {e}")
	return

	class ConversationManager:
	def __init__(self):
	self.transcription_response = ""
	self.llm = LanguageModelProcessor()

	async def main(self):
	def handle_full_sentence(full_sentence):
	self.transcription_response = full_sentence

	# Loop indefinitely until "goodbye" is detected
	while True:
	await get_transcript(handle_full_sentence)

	# Check for "goodbye" to exit the loop
	if "goodbye" in self.transcription_response.lower():
	break

	llm_response = self.llm.process(self.transcription_response)

	tts = TextToSpeech()
	tts.speak(llm_response)

	# Reset transcription_response for the next loop iteration
	self.transcription_response = ""

	if __name__ == "__main__":
	manager = ConversationManager()
	asyncio.run(manager.main())