import streamlit as st import sparknlp import os import pandas as pd from sparknlp.base import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline # Page configuration st.set_page_config( layout="wide", initial_sidebar_state="auto" ) # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): """Initialize Spark NLP.""" return sparknlp.start() @st.cache_resource def create_pipeline(model): """Create a Spark NLP pipeline for audio processing.""" audio_assembler = AudioAssembler() \ .setInputCol("audio_content") \ .setOutputCol("audio_assembler") speech_to_text = Wav2Vec2ForCTC \ .pretrained(model)\ .setInputCols("audio_assembler") \ .setOutputCol("text") pipeline = Pipeline(stages=[ audio_assembler, speech_to_text ]) return pipeline def fit_data(pipeline, fed_data): """Fit the data into the pipeline and return the transcription.""" data, sampling_rate = librosa.load(fed_data, sr=16000) data = [float(x) for x in data] schema = StructType([ StructField("audio_content", ArrayType(FloatType())), StructField("sampling_rate", LongType()) ]) df = pd.DataFrame({ "audio_content": [data], "sampling_rate": [sampling_rate] }) spark_df = spark.createDataFrame(df, schema) pipeline_df = pipeline.fit(spark_df).transform(spark_df) return pipeline_df.select("text.result") def save_uploadedfile(uploadedfile, path): """Save the uploaded file to the specified path.""" filepath = os.path.join(path, uploadedfile.name) with open(filepath, "wb") as f: if hasattr(uploadedfile, 'getbuffer'): f.write(uploadedfile.getbuffer()) else: f.write(uploadedfile.read()) # Sidebar content model_list = [ "asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman", "asr_wav2vec2_base_100h_13K_steps", "asr_wav2vec2_base_100h_ngram", "asr_wav2vec2_base_100h_by_facebook", "asr_wav2vec2_base_100h_test", "asr_wav2vec2_base_960h" ] model = st.sidebar.selectbox( "Choose the pretrained model", model_list, help="For more info about the models visit: https://sparknlp.org/models" ) # Main content st.markdown('
This demo transcribes audio files into texts using the Wav2Vec2ForCTC
Annotator and advanced speech recognition models.