File size: 3,674 Bytes
ccabf63
6b694b5
0fef32f
 
5b29361
6b694b5
d6b02ad
1620753
0fef32f
 
163138e
1620753
163138e
ccabf63
dfb92f3
ccabf63
dfb92f3
 
 
 
0fef32f
dfb92f3
0fef32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfb92f3
0fef32f
163138e
 
0fef32f
163138e
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import streamlit as st
import requests
from io import BytesIO
import base64

API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"}

def query(audio_bytes):
    response = requests.post(API_URL, headers=headers, data=audio_bytes)
    return response.json()

st.title("Speech Recognition with Whisper")

option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))

if option == 'Upload File':
    uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
    if uploaded_file is not None:
        st.audio(uploaded_file, format='audio/wav')
        audio_bytes = uploaded_file.read()
else:
    st.write("Click the button below and allow microphone access to start recording")
    
    # JavaScript to handle audio recording
    js_code = """
    var audioData = null;
    var recorder = null;
    var audioContext = null;
    
    function startRecording() {
        navigator.mediaDevices.getUserMedia({ audio: true })
            .then(stream => {
                audioContext = new AudioContext();
                var input = audioContext.createMediaStreamSource(stream);
                recorder = new Recorder(input);
                recorder.record();
                document.getElementById('startButton').style.display = 'none';
                document.getElementById('stopButton').style.display = 'inline-block';
            });
    }
    
    function stopRecording() {
        recorder.stop();
        document.getElementById('startButton').style.display = 'inline-block';
        document.getElementById('stopButton').style.display = 'none';
        recorder.exportWAV(function(blob) {
            var reader = new FileReader();
            reader.readAsDataURL(blob); 
            reader.onloadend = function() {
                var base64data = reader.result;
                audioData = base64data.split(',')[1];  // Remove the "data:audio/wav;base64," part
                document.getElementById('audioData').value = audioData;
                document.getElementById('submitButton').click();
            }
        });
    }
    """
    
    # HTML for buttons
    html_code = """
    <script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
    <button id="startButton" onclick="startRecording()">Start Recording</button>
    <button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
    <input type="hidden" id="audioData" name="audioData">
    """
    
    st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)
    
    audio_data = st.text_input("Audio data", key="audioData", type="password")
    submit_button = st.empty()
    
    if submit_button.button("Submit", key="submitButton"):
        if audio_data:
            audio_bytes = base64.b64decode(audio_data)
            st.audio(audio_bytes, format="audio/wav")
        else:
            st.warning("No audio recorded. Please record audio before submitting.")

if 'audio_bytes' in locals():
    if st.button('Transcribe'):
        with st.spinner('Transcribing...'):
            result = query(audio_bytes)
            
            if 'text' in result:
                st.success("Transcription completed!")
                st.write("Transcribed text:")
                st.write(result['text'])
            else:
                st.error("An error occurred during transcription.")
                st.write("Error details:")
                st.write(result)

st.markdown("---")
st.write("Note: This app uses the Whisper API from Hugging Face.")