Spaces:

shukdevdatta123
/

VoiceChat

Sleeping

File size: 2,786 Bytes

afe4a62
 
 
 
4a48709
 
afe4a62
4a48709
afe4a62
 
4a48709
 
3afe755
4a48709
 
3afe755
 
afe4a62
 
 
4a48709
afe4a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3afe755
adfe413
 
 
 
285b197
7ec85f9
 
 
 
 
 
 
adfe413
 
 
 
 
 
285b197
7ec85f9
 
afe4a62
 
 
 
 
 
 
 
 
 
 
3afe755

import streamlit as st
from tempfile import NamedTemporaryFile
from audiorecorder import audiorecorder
from whispercpp import Whisper
from pydub import AudioSegment
import io

# Initialize whisper.cpp
w = Whisper('tiny')

def inference(audio_segment):
    # Convert AudioSegment to WAV format in memory
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp:
        # Export AudioSegment to raw bytes in WAV format
        audio_segment.export(temp.name, format="wav")
        temp.close()  # Ensure the file is written and closed before passing it to Whisper
        result = w.transcribe(temp.name)
        text = w.extract_text(result)
    return text[0]

# Streamlit UI setup
with st.sidebar:
    audio = audiorecorder("Click to send voice message", "Recording... Click when you're done", key="recorder")
    st.title("Echo Bot with Whisper")

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# React to user input
if (prompt := st.chat_input("Your message")) or len(audio):
    # If it's coming from the audio recorder transcribe the message with whisper.cpp
    if len(audio) > 0:
        # Debugging: Check the type of the audio object
        st.write(f"Audio Type: {type(audio)}")

        # Handle the case where audio is in a byte format
        if isinstance(audio, bytes):
            try:
                # Convert the raw byte data to an AudioSegment instance
                audio_segment = AudioSegment.from_file(io.BytesIO(audio), format="wav")
                prompt = inference(audio_segment)
            except Exception as e:
                st.error(f"Error processing audio: {e}")
                prompt = "Sorry, there was an error processing your audio."

        # Handle the case where audio is an AudioSegment object
        elif isinstance(audio, AudioSegment):
            # Process it directly since it's already an AudioSegment
            prompt = inference(audio)

        else:
            st.error("The audio data is not in the expected format.")
            prompt = "Sorry, the audio format is not correct."

    # Display user message in chat message container
    st.chat_message("user").markdown(prompt)
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    response = f"Echo: {prompt}"
    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        st.markdown(response)
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": response})