Spaces:

Dmtlant
/

Image

Sleeping

File size: 3,674 Bytes

ccabf63
6b694b5
0fef32f
 
5b29361
6b694b5
d6b02ad
1620753
0fef32f
 
163138e
1620753
163138e
ccabf63
dfb92f3
ccabf63
dfb92f3
 
 
 
0fef32f
dfb92f3
0fef32f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfb92f3
0fef32f
163138e
 
0fef32f
163138e

import streamlit as st
import requests
from io import BytesIO
import base64

API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"}

def query(audio_bytes):
    response = requests.post(API_URL, headers=headers, data=audio_bytes)
    return response.json()

st.title("Speech Recognition with Whisper")

option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))

if option == 'Upload File':
    uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
    if uploaded_file is not None:
        st.audio(uploaded_file, format='audio/wav')
        audio_bytes = uploaded_file.read()
else:
    st.write("Click the button below and allow microphone access to start recording")
    
    # JavaScript to handle audio recording
    js_code = """
    var audioData = null;
    var recorder = null;
    var audioContext = null;
    
    function startRecording() {
        navigator.mediaDevices.getUserMedia({ audio: true })
            .then(stream => {
                audioContext = new AudioContext();
                var input = audioContext.createMediaStreamSource(stream);
                recorder = new Recorder(input);
                recorder.record();
                document.getElementById('startButton').style.display = 'none';
                document.getElementById('stopButton').style.display = 'inline-block';
            });
    }
    
    function stopRecording() {
        recorder.stop();
        document.getElementById('startButton').style.display = 'inline-block';
        document.getElementById('stopButton').style.display = 'none';
        recorder.exportWAV(function(blob) {
            var reader = new FileReader();
            reader.readAsDataURL(blob); 
            reader.onloadend = function() {
                var base64data = reader.result;
                audioData = base64data.split(',')[1];  // Remove the "data:audio/wav;base64," part
                document.getElementById('audioData').value = audioData;
                document.getElementById('submitButton').click();
            }
        });
    }
    """
    
    # HTML for buttons
    html_code = """
    <script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
    <button id="startButton" onclick="startRecording()">Start Recording</button>
    <button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
    <input type="hidden" id="audioData" name="audioData">
    """
    
    st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)
    
    audio_data = st.text_input("Audio data", key="audioData", type="password")
    submit_button = st.empty()
    
    if submit_button.button("Submit", key="submitButton"):
        if audio_data:
            audio_bytes = base64.b64decode(audio_data)
            st.audio(audio_bytes, format="audio/wav")
        else:
            st.warning("No audio recorded. Please record audio before submitting.")

if 'audio_bytes' in locals():
    if st.button('Transcribe'):
        with st.spinner('Transcribing...'):
            result = query(audio_bytes)
            
            if 'text' in result:
                st.success("Transcription completed!")
                st.write("Transcribed text:")
                st.write(result['text'])
            else:
                st.error("An error occurred during transcription.")
                st.write("Error details:")
                st.write(result)

st.markdown("---")
st.write("Note: This app uses the Whisper API from Hugging Face.")