Spaces:
Sleeping
Sleeping
import streamlit as st | |
from tempfile import NamedTemporaryFile | |
from audiorecorder import audiorecorder | |
from whispercpp import Whisper | |
from pydub import AudioSegment | |
import io | |
# Initialize whisper.cpp | |
w = Whisper('tiny') | |
def inference(audio_segment): | |
# Convert AudioSegment to WAV format in memory | |
with NamedTemporaryFile(suffix=".wav", delete=False) as temp: | |
# Export AudioSegment to raw bytes in WAV format | |
audio_segment.export(temp.name, format="wav") | |
temp.close() # Ensure the file is written and closed before passing it to Whisper | |
result = w.transcribe(temp.name) | |
text = w.extract_text(result) | |
return text[0] | |
# Streamlit UI setup | |
with st.sidebar: | |
audio = audiorecorder("Click to send voice message", "Recording... Click when you're done", key="recorder") | |
st.title("Echo Bot with Whisper") | |
# Initialize chat history | |
if "messages" not in st.session_state: | |
st.session_state.messages = [] | |
# Display chat messages from history on app rerun | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
# React to user input | |
if (prompt := st.chat_input("Your message")) or len(audio): | |
# If it's coming from the audio recorder transcribe the message with whisper.cpp | |
if len(audio) > 0: | |
# Debugging: Check the type of the audio object | |
st.write(f"Audio Type: {type(audio)}") | |
# Handle the case where audio is in a byte format | |
if isinstance(audio, bytes): | |
try: | |
# Convert the raw byte data to an AudioSegment instance | |
audio_segment = AudioSegment.from_file(io.BytesIO(audio), format="wav") | |
prompt = inference(audio_segment) | |
except Exception as e: | |
st.error(f"Error processing audio: {e}") | |
prompt = "Sorry, there was an error processing your audio." | |
# Handle the case where audio is an AudioSegment object | |
elif isinstance(audio, AudioSegment): | |
# Process it directly since it's already an AudioSegment | |
prompt = inference(audio) | |
else: | |
st.error("The audio data is not in the expected format.") | |
prompt = "Sorry, the audio format is not correct." | |
# Display user message in chat message container | |
st.chat_message("user").markdown(prompt) | |
# Add user message to chat history | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
response = f"Echo: {prompt}" | |
# Display assistant response in chat message container | |
with st.chat_message("assistant"): | |
st.markdown(response) | |
# Add assistant response to chat history | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |