Image / app.py
Dmtlant's picture
Update app.py
d6b02ad verified
raw
history blame
3.67 kB
import streamlit as st
import requests
from io import BytesIO
import base64
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
headers = {"Authorization": f"Bearer {st.secrets['HF_API_KEY']}"}
def query(audio_bytes):
response = requests.post(API_URL, headers=headers, data=audio_bytes)
return response.json()
st.title("Speech Recognition with Whisper")
option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))
if option == 'Upload File':
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
if uploaded_file is not None:
st.audio(uploaded_file, format='audio/wav')
audio_bytes = uploaded_file.read()
else:
st.write("Click the button below and allow microphone access to start recording")
# JavaScript to handle audio recording
js_code = """
var audioData = null;
var recorder = null;
var audioContext = null;
function startRecording() {
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
audioContext = new AudioContext();
var input = audioContext.createMediaStreamSource(stream);
recorder = new Recorder(input);
recorder.record();
document.getElementById('startButton').style.display = 'none';
document.getElementById('stopButton').style.display = 'inline-block';
});
}
function stopRecording() {
recorder.stop();
document.getElementById('startButton').style.display = 'inline-block';
document.getElementById('stopButton').style.display = 'none';
recorder.exportWAV(function(blob) {
var reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = function() {
var base64data = reader.result;
audioData = base64data.split(',')[1]; // Remove the "data:audio/wav;base64," part
document.getElementById('audioData').value = audioData;
document.getElementById('submitButton').click();
}
});
}
"""
# HTML for buttons
html_code = """
<script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
<button id="startButton" onclick="startRecording()">Start Recording</button>
<button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
<input type="hidden" id="audioData" name="audioData">
"""
st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)
audio_data = st.text_input("Audio data", key="audioData", type="password")
submit_button = st.empty()
if submit_button.button("Submit", key="submitButton"):
if audio_data:
audio_bytes = base64.b64decode(audio_data)
st.audio(audio_bytes, format="audio/wav")
else:
st.warning("No audio recorded. Please record audio before submitting.")
if 'audio_bytes' in locals():
if st.button('Transcribe'):
with st.spinner('Transcribing...'):
result = query(audio_bytes)
if 'text' in result:
st.success("Transcription completed!")
st.write("Transcribed text:")
st.write(result['text'])
else:
st.error("An error occurred during transcription.")
st.write("Error details:")
st.write(result)
st.markdown("---")
st.write("Note: This app uses the Whisper API from Hugging Face.")