Spaces:

shukdevdatta123
/

VoiceChat

Sleeping

App Files Files Community

VoiceChat / app.py

shukdevdatta123

Update app.py

adfe413 verified 4 months ago

raw

history blame

2.79 kB

	import streamlit as st
	from tempfile import NamedTemporaryFile
	from audiorecorder import audiorecorder
	from whispercpp import Whisper
	from pydub import AudioSegment
	import io

	# Initialize whisper.cpp
	w = Whisper('tiny')

	def inference(audio_segment):
	# Convert AudioSegment to WAV format in memory
	with NamedTemporaryFile(suffix=".wav", delete=False) as temp:
	# Export AudioSegment to raw bytes in WAV format
	audio_segment.export(temp.name, format="wav")
	temp.close() # Ensure the file is written and closed before passing it to Whisper
	result = w.transcribe(temp.name)
	text = w.extract_text(result)
	return text[0]

	# Streamlit UI setup
	with st.sidebar:
	audio = audiorecorder("Click to send voice message", "Recording... Click when you're done", key="recorder")
	st.title("Echo Bot with Whisper")

	# Initialize chat history
	if "messages" not in st.session_state:
	st.session_state.messages = []

	# Display chat messages from history on app rerun
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# React to user input
	if (prompt := st.chat_input("Your message")) or len(audio):
	# If it's coming from the audio recorder transcribe the message with whisper.cpp
	if len(audio) > 0:
	# Debugging: Check the type of the audio object
	st.write(f"Audio Type: {type(audio)}")

	# Handle the case where audio is in a byte format
	if isinstance(audio, bytes):
	try:
	# Convert the raw byte data to an AudioSegment instance
	audio_segment = AudioSegment.from_file(io.BytesIO(audio), format="wav")
	prompt = inference(audio_segment)
	except Exception as e:
	st.error(f"Error processing audio: {e}")
	prompt = "Sorry, there was an error processing your audio."

	# Handle the case where audio is an AudioSegment object
	elif isinstance(audio, AudioSegment):
	# Process it directly since it's already an AudioSegment
	prompt = inference(audio)

	else:
	st.error("The audio data is not in the expected format.")
	prompt = "Sorry, the audio format is not correct."

	# Display user message in chat message container
	st.chat_message("user").markdown(prompt)
	# Add user message to chat history
	st.session_state.messages.append({"role": "user", "content": prompt})

	response = f"Echo: {prompt}"
	# Display assistant response in chat message container
	with st.chat_message("assistant"):
	st.markdown(response)
	# Add assistant response to chat history
	st.session_state.messages.append({"role": "assistant", "content": response})