Spaces:

Dmtlant
/

Image

Sleeping

App Files Files Community

Image / app.py

Dmtlant

Update app.py

0fef32f verified 8 months ago

raw

history blame

3.67 kB

	import streamlit as st
	import requests
	from io import BytesIO
	import base64

	API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3-turbo"
	headers = {"Authorization": f"Bearer {st.secrets['hf_token']}"}

	def query(audio_bytes):
	response = requests.post(API_URL, headers=headers, data=audio_bytes)
	return response.json()

	st.title("Speech Recognition with Whisper")

	option = st.radio("Choose input method:", ('Upload File', 'Record from Microphone'))

	if option == 'Upload File':
	uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
	if uploaded_file is not None:
	st.audio(uploaded_file, format='audio/wav')
	audio_bytes = uploaded_file.read()
	else:
	st.write("Click the button below and allow microphone access to start recording")

	# JavaScript to handle audio recording
	js_code = """
	var audioData = null;
	var recorder = null;
	var audioContext = null;

	function startRecording() {
	navigator.mediaDevices.getUserMedia({ audio: true })
	.then(stream => {
	audioContext = new AudioContext();
	var input = audioContext.createMediaStreamSource(stream);
	recorder = new Recorder(input);
	recorder.record();
	document.getElementById('startButton').style.display = 'none';
	document.getElementById('stopButton').style.display = 'inline-block';
	});
	}

	function stopRecording() {
	recorder.stop();
	document.getElementById('startButton').style.display = 'inline-block';
	document.getElementById('stopButton').style.display = 'none';
	recorder.exportWAV(function(blob) {
	var reader = new FileReader();
	reader.readAsDataURL(blob);
	reader.onloadend = function() {
	var base64data = reader.result;
	audioData = base64data.split(',')[1]; // Remove the "data:audio/wav;base64," part
	document.getElementById('audioData').value = audioData;
	document.getElementById('submitButton').click();
	}
	});
	}
	"""

	# HTML for buttons
	html_code = """
	<script src="https://cdn.rawgit.com/mattdiamond/Recorderjs/08e7abd9/dist/recorder.js"></script>
	<button id="startButton" onclick="startRecording()">Start Recording</button>
	<button id="stopButton" style="display: none;" onclick="stopRecording()">Stop Recording</button>
	<input type="hidden" id="audioData" name="audioData">
	"""

	st.components.v1.html(html_code + f'<script>{js_code}</script>', height=100)

	audio_data = st.text_input("Audio data", key="audioData", type="password")
	submit_button = st.empty()

	if submit_button.button("Submit", key="submitButton"):
	if audio_data:
	audio_bytes = base64.b64decode(audio_data)
	st.audio(audio_bytes, format="audio/wav")
	else:
	st.warning("No audio recorded. Please record audio before submitting.")

	if 'audio_bytes' in locals():
	if st.button('Transcribe'):
	with st.spinner('Transcribing...'):
	result = query(audio_bytes)

	if 'text' in result:
	st.success("Transcription completed!")
	st.write("Transcribed text:")
	st.write(result['text'])
	else:
	st.error("An error occurred during transcription.")
	st.write("Error details:")
	st.write(result)

	st.markdown("---")
	st.write("Note: This app uses the Whisper API from Hugging Face.")