File size: 2,786 Bytes
afe4a62
 
 
 
4a48709
 
afe4a62
4a48709
afe4a62
 
4a48709
 
3afe755
4a48709
 
3afe755
 
afe4a62
 
 
4a48709
afe4a62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3afe755
adfe413
 
 
 
285b197
7ec85f9
 
 
 
 
 
 
adfe413
 
 
 
 
 
285b197
7ec85f9
 
afe4a62
 
 
 
 
 
 
 
 
 
 
3afe755
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from tempfile import NamedTemporaryFile
from audiorecorder import audiorecorder
from whispercpp import Whisper
from pydub import AudioSegment
import io

# Initialize whisper.cpp
w = Whisper('tiny')

def inference(audio_segment):
    # Convert AudioSegment to WAV format in memory
    with NamedTemporaryFile(suffix=".wav", delete=False) as temp:
        # Export AudioSegment to raw bytes in WAV format
        audio_segment.export(temp.name, format="wav")
        temp.close()  # Ensure the file is written and closed before passing it to Whisper
        result = w.transcribe(temp.name)
        text = w.extract_text(result)
    return text[0]

# Streamlit UI setup
with st.sidebar:
    audio = audiorecorder("Click to send voice message", "Recording... Click when you're done", key="recorder")
    st.title("Echo Bot with Whisper")

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# React to user input
if (prompt := st.chat_input("Your message")) or len(audio):
    # If it's coming from the audio recorder transcribe the message with whisper.cpp
    if len(audio) > 0:
        # Debugging: Check the type of the audio object
        st.write(f"Audio Type: {type(audio)}")

        # Handle the case where audio is in a byte format
        if isinstance(audio, bytes):
            try:
                # Convert the raw byte data to an AudioSegment instance
                audio_segment = AudioSegment.from_file(io.BytesIO(audio), format="wav")
                prompt = inference(audio_segment)
            except Exception as e:
                st.error(f"Error processing audio: {e}")
                prompt = "Sorry, there was an error processing your audio."

        # Handle the case where audio is an AudioSegment object
        elif isinstance(audio, AudioSegment):
            # Process it directly since it's already an AudioSegment
            prompt = inference(audio)

        else:
            st.error("The audio data is not in the expected format.")
            prompt = "Sorry, the audio format is not correct."

    # Display user message in chat message container
    st.chat_message("user").markdown(prompt)
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    response = f"Echo: {prompt}"
    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        st.markdown(response)
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": response})