Spaces:
Sleeping
Sleeping
File size: 2,786 Bytes
afe4a62 4a48709 afe4a62 4a48709 afe4a62 4a48709 3afe755 4a48709 3afe755 afe4a62 4a48709 afe4a62 3afe755 adfe413 285b197 7ec85f9 adfe413 285b197 7ec85f9 afe4a62 3afe755 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import streamlit as st
from tempfile import NamedTemporaryFile
from audiorecorder import audiorecorder
from whispercpp import Whisper
from pydub import AudioSegment
import io
# Initialize whisper.cpp
w = Whisper('tiny')
def inference(audio_segment):
# Convert AudioSegment to WAV format in memory
with NamedTemporaryFile(suffix=".wav", delete=False) as temp:
# Export AudioSegment to raw bytes in WAV format
audio_segment.export(temp.name, format="wav")
temp.close() # Ensure the file is written and closed before passing it to Whisper
result = w.transcribe(temp.name)
text = w.extract_text(result)
return text[0]
# Streamlit UI setup
with st.sidebar:
audio = audiorecorder("Click to send voice message", "Recording... Click when you're done", key="recorder")
st.title("Echo Bot with Whisper")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# React to user input
if (prompt := st.chat_input("Your message")) or len(audio):
# If it's coming from the audio recorder transcribe the message with whisper.cpp
if len(audio) > 0:
# Debugging: Check the type of the audio object
st.write(f"Audio Type: {type(audio)}")
# Handle the case where audio is in a byte format
if isinstance(audio, bytes):
try:
# Convert the raw byte data to an AudioSegment instance
audio_segment = AudioSegment.from_file(io.BytesIO(audio), format="wav")
prompt = inference(audio_segment)
except Exception as e:
st.error(f"Error processing audio: {e}")
prompt = "Sorry, there was an error processing your audio."
# Handle the case where audio is an AudioSegment object
elif isinstance(audio, AudioSegment):
# Process it directly since it's already an AudioSegment
prompt = inference(audio)
else:
st.error("The audio data is not in the expected format.")
prompt = "Sorry, the audio format is not correct."
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
response = f"Echo: {prompt}"
# Display assistant response in chat message container
with st.chat_message("assistant"):
st.markdown(response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": response})
|