Spaces:

spark-nlp
/

Wav2Vec2ForCTC

Sleeping

App Files Files Community

abdullahmubeen10 commited on Aug 8, 2024

Commit

9883a18

verified ·

1 Parent(s): 6b52778

Update Demo.py

Browse files

Files changed (1) hide show

Demo.py +147 -146

Demo.py CHANGED Viewed

@@ -1,146 +1,147 @@
-import streamlit as st
-import sparknlp
-import os
-import pandas as pd
-from sparknlp.base import *
-from sparknlp.annotator import *
-from pyspark.ml import Pipeline
-from sparknlp.pretrained import PretrainedPipeline
-# Page configuration
-st.set_page_config(
-    layout="wide",
-    initial_sidebar_state="auto"
-)
-# Custom CSS for styling
-st.markdown("""
-    <style>
-        .main-title {
-            font-size: 36px;
-            color: #4A90E2;
-            font-weight: bold;
-            text-align: center;
-        }
-        .section {
-            background-color: #f9f9f9;
-            padding: 10px;
-            border-radius: 10px;
-            margin-top: 10px;
-        }
-        .section p, .section ul {
-            color: #666666;
-        }
-    </style>
-""", unsafe_allow_html=True)
-@st.cache_resource
-def init_spark():
-    """Initialize Spark NLP."""
-    return sparknlp.start()
-@st.cache_resource
-def create_pipeline(model):
-    """Create a Spark NLP pipeline for audio processing."""
-    audio_assembler = AudioAssembler() \
-        .setInputCol("audio_content") \
-        .setOutputCol("audio_assembler")
-    speech_to_text = Wav2Vec2ForCTC \
-        .pretrained(model)\
-        .setInputCols("audio_assembler") \
-        .setOutputCol("text")
-    pipeline = Pipeline(stages=[
-        audio_assembler,
-        speech_to_text
-    ])
-    return pipeline
-def fit_data(pipeline, fed_data):
-    """Fit the data into the pipeline and return the transcription."""
-    data, sampling_rate = librosa.load(fed_data, sr=16000)
-    data = [float(x) for x in data]
-    schema = StructType([
-        StructField("audio_content", ArrayType(FloatType())),
-        StructField("sampling_rate", LongType())
-    ])
-    df = pd.DataFrame({
-        "audio_content": [data],
-        "sampling_rate": [sampling_rate]
-    })
-    spark_df = spark.createDataFrame(df, schema)
-    pipeline_df = pipeline.fit(spark_df).transform(spark_df)
-    return pipeline_df.select("text.result")
-def save_uploadedfile(uploadedfile, path):
-    """Save the uploaded file to the specified path."""
-    filepath = os.path.join(path, uploadedfile.name)
-    with open(filepath, "wb") as f:
-        if hasattr(uploadedfile, 'getbuffer'):
-            f.write(uploadedfile.getbuffer())
-        else:
-            f.write(uploadedfile.read())
-# Sidebar content
-model_list = [
-    "asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman",
-    "asr_wav2vec2_base_100h_13K_steps",
-    "asr_wav2vec2_base_100h_ngram",
-    "asr_wav2vec2_base_100h_by_facebook",
-    "asr_wav2vec2_base_100h_test",
-    "asr_wav2vec2_base_960h"
-]
-model = st.sidebar.selectbox(
-    "Choose the pretrained model",
-    model_list,
-    help="For more info about the models visit: https://sparknlp.org/models"
-)
-# Main content
-st.markdown('<div class="main-title">Speech Recognition With Wav2Vec2ForCTC</div>', unsafe_allow_html=True)
-st.markdown('<div class="section"><p>This demo transcribes audio files into texts using the <code>Wav2Vec2ForCTC</code> Annotator and advanced speech recognition models.</p></div>', unsafe_allow_html=True)
-# Reference notebook link in sidebar
-st.sidebar.markdown('Reference notebook:')
-st.sidebar.markdown("""
-    <a href="https://githubtocolab.com/JohnSnowLabs/spark-nlp-workshop/blob/master/open-source-nlp/17.0.Automatic_Speech_Recognition_Wav2Vec2.ipynb">
-        <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
-    </a>
-""", unsafe_allow_html=True)
-# Load examples
-AUDIO_FILE_PATH = "inputs"
-audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
-selected_audio = st.selectbox("Select an audio", audio_files)
-# Creating a simplified Python list of audio file types
-audio_file_types = ["mp3", "flac", "wav", "aac", "ogg", "aiff", "wma", "m4a", "ape", "dsf", "dff", "midi", "mid", "opus", "amr"]
-uploadedfile = st.file_uploader("Try it for yourself!", type=audio_file_types)
-if uploadedfile:
-    selected_audio = f"{AUDIO_FILE_PATH}/{uploadedfile.name}"
-    save_uploadedfile(uploadedfile, AUDIO_FILE_PATH)
-elif selected_audio:
-    selected_audio = f"{AUDIO_FILE_PATH}/{selected_audio}"
-# Audio playback and transcription
-st.subheader("Play Audio")
-with open(selected_audio, 'rb') as audio_file:
-    audio_bytes = audio_file.read()
-st.audio(audio_bytes)
-st.subheader(f"Transcription for {selected_audio}:")
-spark = init_spark()
-pipeline = create_pipeline(model)
-output = fit_data(pipeline, selected_audio)
-st.text(output.first().result[0].strip())

+import streamlit as st
+import sparknlp
+import os
+import pandas as pd
+import librosa
+from sparknlp.base import *
+from sparknlp.annotator import *
+from pyspark.ml import Pipeline
+from sparknlp.pretrained import PretrainedPipeline
+# Page configuration
+st.set_page_config(
+    layout="wide",
+    initial_sidebar_state="auto"
+)
+# Custom CSS for styling
+st.markdown("""
+    <style>
+        .main-title {
+            font-size: 36px;
+            color: #4A90E2;
+            font-weight: bold;
+            text-align: center;
+        }
+        .section {
+            background-color: #f9f9f9;
+            padding: 10px;
+            border-radius: 10px;
+            margin-top: 10px;
+        }
+        .section p, .section ul {
+            color: #666666;
+        }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def init_spark():
+    """Initialize Spark NLP."""
+    return sparknlp.start()
+@st.cache_resource
+def create_pipeline(model):
+    """Create a Spark NLP pipeline for audio processing."""
+    audio_assembler = AudioAssembler() \
+        .setInputCol("audio_content") \
+        .setOutputCol("audio_assembler")
+    speech_to_text = Wav2Vec2ForCTC \
+        .pretrained(model)\
+        .setInputCols("audio_assembler") \
+        .setOutputCol("text")
+    pipeline = Pipeline(stages=[
+        audio_assembler,
+        speech_to_text
+    ])
+    return pipeline
+def fit_data(pipeline, fed_data):
+    """Fit the data into the pipeline and return the transcription."""
+    data, sampling_rate = librosa.load(fed_data, sr=16000)
+    data = [float(x) for x in data]
+    schema = StructType([
+        StructField("audio_content", ArrayType(FloatType())),
+        StructField("sampling_rate", LongType())
+    ])
+    df = pd.DataFrame({
+        "audio_content": [data],
+        "sampling_rate": [sampling_rate]
+    })
+    spark_df = spark.createDataFrame(df, schema)
+    pipeline_df = pipeline.fit(spark_df).transform(spark_df)
+    return pipeline_df.select("text.result")
+def save_uploadedfile(uploadedfile, path):
+    """Save the uploaded file to the specified path."""
+    filepath = os.path.join(path, uploadedfile.name)
+    with open(filepath, "wb") as f:
+        if hasattr(uploadedfile, 'getbuffer'):
+            f.write(uploadedfile.getbuffer())
+        else:
+            f.write(uploadedfile.read())
+# Sidebar content
+model_list = [
+    "asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman",
+    "asr_wav2vec2_base_100h_13K_steps",
+    "asr_wav2vec2_base_100h_ngram",
+    "asr_wav2vec2_base_100h_by_facebook",
+    "asr_wav2vec2_base_100h_test",
+    "asr_wav2vec2_base_960h"
+]
+model = st.sidebar.selectbox(
+    "Choose the pretrained model",
+    model_list,
+    help="For more info about the models visit: https://sparknlp.org/models"
+)
+# Main content
+st.markdown('<div class="main-title">Speech Recognition With Wav2Vec2ForCTC</div>', unsafe_allow_html=True)
+st.markdown('<div class="section"><p>This demo transcribes audio files into texts using the <code>Wav2Vec2ForCTC</code> Annotator and advanced speech recognition models.</p></div>', unsafe_allow_html=True)
+# Reference notebook link in sidebar
+st.sidebar.markdown('Reference notebook:')
+st.sidebar.markdown("""
+    <a href="https://githubtocolab.com/JohnSnowLabs/spark-nlp-workshop/blob/master/open-source-nlp/17.0.Automatic_Speech_Recognition_Wav2Vec2.ipynb">
+        <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
+    </a>
+""", unsafe_allow_html=True)
+# Load examples
+AUDIO_FILE_PATH = "inputs"
+audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
+selected_audio = st.selectbox("Select an audio", audio_files)
+# Creating a simplified Python list of audio file types
+audio_file_types = ["mp3", "flac", "wav", "aac", "ogg", "aiff", "wma", "m4a", "ape", "dsf", "dff", "midi", "mid", "opus", "amr"]
+uploadedfile = st.file_uploader("Try it for yourself!", type=audio_file_types)
+if uploadedfile:
+    selected_audio = f"{AUDIO_FILE_PATH}/{uploadedfile.name}"
+    save_uploadedfile(uploadedfile, AUDIO_FILE_PATH)
+elif selected_audio:
+    selected_audio = f"{AUDIO_FILE_PATH}/{selected_audio}"
+# Audio playback and transcription
+st.subheader("Play Audio")
+with open(selected_audio, 'rb') as audio_file:
+    audio_bytes = audio_file.read()
+st.audio(audio_bytes)
+st.subheader(f"Transcription for {selected_audio}:")
+spark = init_spark()
+pipeline = create_pipeline(model)
+output = fit_data(pipeline, selected_audio)
+st.text(output.first().result[0].strip())