import numpy as np import streamlit as st import speech_recognition as sr import os import tempfile import librosa import soundfile as sf def convert_to_wav(audio_file): y, sr = librosa.load(audio_file, sr=None) with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_wav: sf.write(tmp_wav.name, y, sr, format='wav') return tmp_wav.name def transcribe_audio(audio_file): recognizer = sr.Recognizer() with sr.AudioFile(audio_file) as source: audio = recognizer.record(source) try: return recognizer.recognize_google(audio) except sr.UnknownValueError: return "Speech recognition could not understand the audio" except sr.RequestError as e: return f"Could not request results from speech recognition service; {e}" def main(): st.title("Speech-to-Text Converter") uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a", "ogg", "flac"]) if uploaded_file is not None: st.audio(uploaded_file) if st.button("Transcribe"): with tempfile.NamedTemporaryFile(delete=False, suffix='.' + uploaded_file.name.split('.')[-1]) as tmp_file: tmp_file.write(uploaded_file.getvalue()) tmp_file_path = tmp_file.name try: wav_file_path = convert_to_wav(tmp_file_path) transcription = transcribe_audio(wav_file_path) st.write("Transcription:") st.write(transcription) except Exception as e: st.error(f"An error occurred: {str(e)}") finally: os.unlink(tmp_file_path) if 'wav_file_path' in locals(): os.unlink(wav_file_path) if __name__ == "__main__": main()