|
import numpy as np |
|
import streamlit as st |
|
import speech_recognition as sr |
|
import os |
|
import tempfile |
|
import librosa |
|
import soundfile as sf |
|
|
|
def convert_to_wav(audio_file): |
|
y, sr = librosa.load(audio_file, sr=None) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_wav: |
|
sf.write(tmp_wav.name, y, sr, format='wav') |
|
return tmp_wav.name |
|
|
|
def transcribe_audio(audio_file): |
|
recognizer = sr.Recognizer() |
|
with sr.AudioFile(audio_file) as source: |
|
audio = recognizer.record(source) |
|
try: |
|
return recognizer.recognize_google(audio) |
|
except sr.UnknownValueError: |
|
return "Speech recognition could not understand the audio" |
|
except sr.RequestError as e: |
|
return f"Could not request results from speech recognition service; {e}" |
|
|
|
def main(): |
|
st.title("Speech-to-Text Converter") |
|
uploaded_file = st.file_uploader("Choose an audio file", type=["wav", "mp3", "m4a", "ogg", "flac"]) |
|
|
|
if uploaded_file is not None: |
|
st.audio(uploaded_file) |
|
if st.button("Transcribe"): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix='.' + uploaded_file.name.split('.')[-1]) as tmp_file: |
|
tmp_file.write(uploaded_file.getvalue()) |
|
tmp_file_path = tmp_file.name |
|
|
|
try: |
|
wav_file_path = convert_to_wav(tmp_file_path) |
|
transcription = transcribe_audio(wav_file_path) |
|
st.write("Transcription:") |
|
st.write(transcription) |
|
except Exception as e: |
|
st.error(f"An error occurred: {str(e)}") |
|
finally: |
|
os.unlink(tmp_file_path) |
|
if 'wav_file_path' in locals(): |
|
os.unlink(wav_file_path) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|