import streamlit as st import sparknlp import os import pandas as pd import librosa from sparknlp.base import * from sparknlp.common import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline from pyspark.sql.types import * import pyspark.sql.functions as F # Page configuration st.set_page_config( layout="wide", initial_sidebar_state="auto" ) # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): """Initialize Spark NLP.""" return sparknlp.start() @st.cache_resource def create_pipeline(model): """Create a Spark NLP pipeline for audio processing.""" audio_assembler = AudioAssembler() \ .setInputCol("audio_content") \ .setOutputCol("audio_assembler") speech_to_text = HubertForCTC \ .pretrained(model)\ .setInputCols("audio_assembler") \ .setOutputCol("text") pipeline = Pipeline(stages=[ audio_assembler, speech_to_text ]) return pipeline def fit_data(pipeline, fed_data): """Fit the data into the pipeline and return the transcription.""" data, sampling_rate = librosa.load(fed_data, sr=16000) data = data.tolist() spark_df = spark.createDataFrame([[data]], ["audio_content"]) model = pipeline.fit(spark_df) lp = LightPipeline(model) lp_result = lp.fullAnnotate(data)[0] return lp_result def save_uploadedfile(uploadedfile, path): """Save the uploaded file to the specified path.""" filepath = os.path.join(path, uploadedfile.name) with open(filepath, "wb") as f: if hasattr(uploadedfile, 'getbuffer'): f.write(uploadedfile.getbuffer()) else: f.write(uploadedfile.read()) # Sidebar content model_list = ["asr_hubert_large_ls960"] model = st.sidebar.selectbox( "Choose the pretrained model", model_list, help="For more info about the models visit: https://sparknlp.org/models" ) # Main content st.markdown('
This demo transcribes audio files into texts using the HubertForCTC
Annotator and advanced speech recognition models.