Spaces:

rahul-appu
/

VoiceDemo

Sleeping

App Files Files Community

VoiceDemo / app.py

rahul-appu

Upload 2 files

fa188df verified 6 months ago

raw

history blame contribute delete

3.28 kB

	import io
	import os

	import gradio as gr
	from groq import Groq
	import soundfile as sf
	from dotenv import load_dotenv
	from gradio import ChatMessage
	from deepgram import DeepgramClient, SpeakOptions


	def get_transcript(audio):

	# Convert the audio to MP3 format
	audio_buffer = io.BytesIO()
	sf.write(audio_buffer, audio[1], samplerate=audio[0], format="MP3")
	audio_buffer.seek(0)

	# Groq client
	client = Groq()

	translation = client.audio.transcriptions.create(
	file=("audio.mp3", audio_buffer.read()),
	model="whisper-large-v3-turbo",
	response_format="json",
	temperature=0.0,
	)

	return translation.text


	def generate_response(chat_history: list[ChatMessage]):

	# Groq client
	client = Groq()

	messages = [
	{
	"role": "system",
	"content": "You are an assistant working in a helpline center. Answer queries in short and concise sentences. Keep in mind that the output will be converted to voice, so use appropriate vocabulary.", # noqa
	} # noqa
	]

	messages.extend(
	[
	{"role": message["role"], "content": message["content"]}
	for message in chat_history # noqa
	]
	)

	response = client.chat.completions.create(
	model="llama3-8b-8192",
	messages=messages,
	)

	return response.choices[0].message.content


	def speech_synthesis(text: str):

	DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
	TEXT = {"text": text}
	FILENAME = "audio.mp3"

	try:
	deepgram = DeepgramClient(DEEPGRAM_API_KEY)

	options = SpeakOptions(
	model="aura-luna-en",
	)

	deepgram.speak.v("1").save(FILENAME, TEXT, options)

	with open(FILENAME, "rb") as audio_file:
	audio_data = audio_file.read()
	return audio_data

	except Exception as e:
	print(f"Exception: {e}")
	return None


	def process_audio(audio, chat_history: list[ChatMessage]):
	# If audio is None, return None and chat history
	if audio is None:
	return None, chat_history

	transcript = get_transcript(audio)
	chat_history.append({"role": "user", "content": transcript})

	response = generate_response(chat_history)
	chat_history.append({"role": "assistant", "content": response})

	audio_data = speech_synthesis(response)

	return audio_data, chat_history


	with gr.Blocks() as demo:
	gr.Markdown(
	"<h1 style='text-align: center;'> Welcome to the Audio Chatbot Demo</h1>" # noqa
	)
	with gr.Row():
	with gr.Column():
	input_audio = gr.Audio(
	label="Input Audio", sources="microphone", type="numpy"
	)
	output_audio = gr.Audio(label="Output Audio", interactive=False)
	with gr.Column():
	chatbot = gr.Chatbot(label="Chatbot", type="messages")

	process_button = gr.Button("Process Audio")
	process_button.click(
	fn=process_audio,
	inputs=[input_audio, chatbot],
	outputs=[output_audio, chatbot], # noqa
	) # noqa

	if __name__ == "__main__":
	load_dotenv()
	demo.launch()