|
import streamlit as st |
|
import requests |
|
from io import BytesIO |
|
import base64 |
|
|
|
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo" |
|
headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"} |
|
|
|
def query(audio_bytes): |
|
response = requests.post(API_URL, headers=headers, data=audio_bytes) |
|
return response.json() |
|
|
|
st.title("Speech Recognition with Whisper") |
|
|
|
option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone')) |
|
|
|
if option == 'Upload File': |
|
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac']) |
|
if uploaded_file is not None: |
|
st.audio(uploaded_file, format='audio/wav') |
|
audio_bytes = uploaded_file.read() |
|
else: |
|
st.write("Click the button below and allow microphone access to start recording") |
|
|
|
|
|
js_code = """ |
|
var audioData = null; |
|
var recorder = null; |
|
var audioContext = null; |
|
|
|
function startRecording() { |
|
navigator.mediaDevices.getUserMedia({ audio: true }) |
|
.then(stream => { |
|
audioContext = new AudioContext(); |
|
var input = audioContext.createMediaStreamSource(stream); |
|
recorder = new Recorder(input); |
|
recorder.record(); |
|
document.getElementById('startButton').style.display = 'none'; |
|
document.getElementById('stopButton').style.display = 'inline-block'; |
|
}); |
|
} |
|
|
|
function stopRecording() { |
|
recorder.stop(); |
|
document.getElementById('startButton').style.display = 'inline-block'; |
|
document.getElementById('stopButton').style.display = 'none'; |
|
recorder.exportWAV(function(blob) { |
|
var reader = new FileReader(); |
|
reader.readAsDataURL(blob); |
|
reader.onloadend = function() { |
|
var base64data = reader.result; |
|
audioData = base64data.split(',')[1]; // Remove the "data:audio/wav;base64," part |
|
document.getElementById('audioData').value = audioData; |
|
document.getElementById('submitButton').click(); |
|
} |
|
}); |
|
} |
|
""" |
|
|
|
|
|
html_code = """ |
|
<script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script> |
|
<button id="startButton" onclick="startRecording()">Start Recording</button> |
|
<button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button> |
|
<input type="hidden" id="audioData" name="audioData"> |
|
""" |
|
|
|
st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100) |
|
|
|
audio_data = st.text_input("Audio data", key="audioData", type="password") |
|
submit_button = st.empty() |
|
|
|
if submit_button.button("Submit", key="submitButton"): |
|
if audio_data: |
|
audio_bytes = base64.b64decode(audio_data) |
|
st.audio(audio_bytes, format="audio/wav") |
|
else: |
|
st.warning("No audio recorded. Please record audio before submitting.") |
|
|
|
if 'audio_bytes' in locals(): |
|
if st.button('Transcribe'): |
|
with st.spinner('Transcribing...'): |
|
result = query(audio_bytes) |
|
|
|
if 'text' in result: |
|
st.success("Transcription completed!") |
|
st.write("Transcribed text:") |
|
st.write(result['text']) |
|
else: |
|
st.error("An error occurred during transcription.") |
|
st.write("Error details:") |
|
st.write(result) |
|
|
|
st.markdown("---") |
|
st.write("Note: This app uses the Whisper API from Hugging Face.") |