Spaces:
Sleeping
Sleeping
Upload 15 files
Browse files- .streamlit/config.toml +3 -0
- Demo.py +146 -0
- Dockerfile +70 -0
- inputs/audio-1.flac +0 -0
- inputs/audio-10.flac +0 -0
- inputs/audio-2.flac +0 -0
- inputs/audio-3.flac +0 -0
- inputs/audio-4.flac +0 -0
- inputs/audio-5.flac +0 -0
- inputs/audio-6.flac +0 -0
- inputs/audio-7.flac +0 -0
- inputs/audio-8.flac +0 -0
- inputs/audio-9.flac +0 -0
- pages/Workflow & Model Overview.py +198 -0
- requirements.txt +5 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
primaryColor="#29B4E8"
|
Demo.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import sparknlp
|
3 |
+
import os
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from sparknlp.base import *
|
7 |
+
from sparknlp.annotator import *
|
8 |
+
from pyspark.ml import Pipeline
|
9 |
+
from sparknlp.pretrained import PretrainedPipeline
|
10 |
+
|
11 |
+
# Page configuration
|
12 |
+
st.set_page_config(
|
13 |
+
layout="wide",
|
14 |
+
initial_sidebar_state="auto"
|
15 |
+
)
|
16 |
+
|
17 |
+
# Custom CSS for styling
|
18 |
+
st.markdown("""
|
19 |
+
<style>
|
20 |
+
.main-title {
|
21 |
+
font-size: 36px;
|
22 |
+
color: #4A90E2;
|
23 |
+
font-weight: bold;
|
24 |
+
text-align: center;
|
25 |
+
}
|
26 |
+
.section {
|
27 |
+
background-color: #f9f9f9;
|
28 |
+
padding: 10px;
|
29 |
+
border-radius: 10px;
|
30 |
+
margin-top: 10px;
|
31 |
+
}
|
32 |
+
.section p, .section ul {
|
33 |
+
color: #666666;
|
34 |
+
}
|
35 |
+
</style>
|
36 |
+
""", unsafe_allow_html=True)
|
37 |
+
|
38 |
+
@st.cache_resource
|
39 |
+
def init_spark():
|
40 |
+
"""Initialize Spark NLP."""
|
41 |
+
return sparknlp.start()
|
42 |
+
|
43 |
+
@st.cache_resource
|
44 |
+
def create_pipeline(model):
|
45 |
+
"""Create a Spark NLP pipeline for audio processing."""
|
46 |
+
audio_assembler = AudioAssembler() \
|
47 |
+
.setInputCol("audio_content") \
|
48 |
+
.setOutputCol("audio_assembler")
|
49 |
+
|
50 |
+
speech_to_text = Wav2Vec2ForCTC \
|
51 |
+
.pretrained(model)\
|
52 |
+
.setInputCols("audio_assembler") \
|
53 |
+
.setOutputCol("text")
|
54 |
+
|
55 |
+
pipeline = Pipeline(stages=[
|
56 |
+
audio_assembler,
|
57 |
+
speech_to_text
|
58 |
+
])
|
59 |
+
return pipeline
|
60 |
+
|
61 |
+
def fit_data(pipeline, fed_data):
|
62 |
+
"""Fit the data into the pipeline and return the transcription."""
|
63 |
+
data, sampling_rate = librosa.load(fed_data, sr=16000)
|
64 |
+
data = [float(x) for x in data]
|
65 |
+
|
66 |
+
schema = StructType([
|
67 |
+
StructField("audio_content", ArrayType(FloatType())),
|
68 |
+
StructField("sampling_rate", LongType())
|
69 |
+
])
|
70 |
+
|
71 |
+
df = pd.DataFrame({
|
72 |
+
"audio_content": [data],
|
73 |
+
"sampling_rate": [sampling_rate]
|
74 |
+
})
|
75 |
+
|
76 |
+
spark_df = spark.createDataFrame(df, schema)
|
77 |
+
pipeline_df = pipeline.fit(spark_df).transform(spark_df)
|
78 |
+
return pipeline_df.select("text.result")
|
79 |
+
|
80 |
+
def save_uploadedfile(uploadedfile, path):
|
81 |
+
"""Save the uploaded file to the specified path."""
|
82 |
+
filepath = os.path.join(path, uploadedfile.name)
|
83 |
+
with open(filepath, "wb") as f:
|
84 |
+
if hasattr(uploadedfile, 'getbuffer'):
|
85 |
+
f.write(uploadedfile.getbuffer())
|
86 |
+
else:
|
87 |
+
f.write(uploadedfile.read())
|
88 |
+
|
89 |
+
# Sidebar content
|
90 |
+
model_list = [
|
91 |
+
"asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman",
|
92 |
+
"asr_wav2vec2_base_100h_13K_steps",
|
93 |
+
"asr_wav2vec2_base_100h_ngram",
|
94 |
+
"asr_wav2vec2_base_100h_by_facebook",
|
95 |
+
"asr_wav2vec2_base_100h_test",
|
96 |
+
"asr_wav2vec2_base_960h"
|
97 |
+
]
|
98 |
+
|
99 |
+
model = st.sidebar.selectbox(
|
100 |
+
"Choose the pretrained model",
|
101 |
+
model_list,
|
102 |
+
help="For more info about the models visit: https://sparknlp.org/models"
|
103 |
+
)
|
104 |
+
|
105 |
+
# Main content
|
106 |
+
st.markdown('<div class="main-title">Speech Recognition With Wav2Vec2ForCTC</div>', unsafe_allow_html=True)
|
107 |
+
st.markdown('<div class="section"><p>This demo transcribes audio files into texts using the <code>Wav2Vec2ForCTC</code> Annotator and advanced speech recognition models.</p></div>', unsafe_allow_html=True)
|
108 |
+
|
109 |
+
# Reference notebook link in sidebar
|
110 |
+
st.sidebar.markdown('Reference notebook:')
|
111 |
+
st.sidebar.markdown("""
|
112 |
+
<a href="https://githubtocolab.com/JohnSnowLabs/spark-nlp-workshop/blob/master/open-source-nlp/17.0.Automatic_Speech_Recognition_Wav2Vec2.ipynb">
|
113 |
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
|
114 |
+
</a>
|
115 |
+
""", unsafe_allow_html=True)
|
116 |
+
|
117 |
+
# Load examples
|
118 |
+
AUDIO_FILE_PATH = "inputs"
|
119 |
+
audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
|
120 |
+
|
121 |
+
selected_audio = st.selectbox("Select an audio", audio_files)
|
122 |
+
|
123 |
+
# Creating a simplified Python list of audio file types
|
124 |
+
audio_file_types = ["mp3", "flac", "wav", "aac", "ogg", "aiff", "wma", "m4a", "ape", "dsf", "dff", "midi", "mid", "opus", "amr"]
|
125 |
+
uploadedfile = st.file_uploader("Try it for yourself!", type=audio_file_types)
|
126 |
+
|
127 |
+
if uploadedfile:
|
128 |
+
selected_audio = f"{AUDIO_FILE_PATH}/{uploadedfile.name}"
|
129 |
+
save_uploadedfile(uploadedfile, AUDIO_FILE_PATH)
|
130 |
+
elif selected_audio:
|
131 |
+
selected_audio = f"{AUDIO_FILE_PATH}/{selected_audio}"
|
132 |
+
|
133 |
+
# Audio playback and transcription
|
134 |
+
st.subheader("Play Audio")
|
135 |
+
|
136 |
+
with open(selected_audio, 'rb') as audio_file:
|
137 |
+
audio_bytes = audio_file.read()
|
138 |
+
st.audio(audio_bytes)
|
139 |
+
|
140 |
+
st.subheader(f"Transcription for {selected_audio}:")
|
141 |
+
|
142 |
+
spark = init_spark()
|
143 |
+
pipeline = create_pipeline(model)
|
144 |
+
output = fit_data(pipeline, selected_audio)
|
145 |
+
|
146 |
+
st.text(output.first().result[0].strip())
|
Dockerfile
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Download base image ubuntu 18.04
|
2 |
+
FROM ubuntu:18.04
|
3 |
+
|
4 |
+
# Set environment variables
|
5 |
+
ENV NB_USER jovyan
|
6 |
+
ENV NB_UID 1000
|
7 |
+
ENV HOME /home/${NB_USER}
|
8 |
+
|
9 |
+
# Install required packages
|
10 |
+
RUN apt-get update && apt-get install -y \
|
11 |
+
tar \
|
12 |
+
wget \
|
13 |
+
bash \
|
14 |
+
rsync \
|
15 |
+
gcc \
|
16 |
+
libfreetype6-dev \
|
17 |
+
libhdf5-serial-dev \
|
18 |
+
libpng-dev \
|
19 |
+
libzmq3-dev \
|
20 |
+
python3 \
|
21 |
+
python3-dev \
|
22 |
+
python3-pip \
|
23 |
+
unzip \
|
24 |
+
pkg-config \
|
25 |
+
software-properties-common \
|
26 |
+
graphviz \
|
27 |
+
openjdk-8-jdk \
|
28 |
+
ant \
|
29 |
+
ca-certificates-java \
|
30 |
+
&& apt-get clean \
|
31 |
+
&& update-ca-certificates -f;
|
32 |
+
|
33 |
+
# Install Python 3.8 and pip
|
34 |
+
RUN add-apt-repository ppa:deadsnakes/ppa \
|
35 |
+
&& apt-get update \
|
36 |
+
&& apt-get install -y python3.8 python3-pip \
|
37 |
+
&& apt-get clean;
|
38 |
+
|
39 |
+
# Set up JAVA_HOME
|
40 |
+
ENV JAVA_HOME /usr/lib/jvm/java-8-openjdk-amd64/
|
41 |
+
RUN mkdir -p ${HOME} \
|
42 |
+
&& echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/" >> ${HOME}/.bashrc \
|
43 |
+
&& chown -R ${NB_UID}:${NB_UID} ${HOME}
|
44 |
+
|
45 |
+
# Create a new user named "jovyan" with user ID 1000
|
46 |
+
RUN useradd -m -u ${NB_UID} ${NB_USER}
|
47 |
+
|
48 |
+
# Switch to the "jovyan" user
|
49 |
+
USER ${NB_USER}
|
50 |
+
|
51 |
+
# Set home and path variables for the user
|
52 |
+
ENV HOME=/home/${NB_USER} \
|
53 |
+
PATH=/home/${NB_USER}/.local/bin:$PATH
|
54 |
+
|
55 |
+
# Set the working directory to the user's home directory
|
56 |
+
WORKDIR ${HOME}
|
57 |
+
|
58 |
+
# Upgrade pip and install Python dependencies
|
59 |
+
RUN python3.8 -m pip install --upgrade pip
|
60 |
+
COPY requirements.txt /tmp/requirements.txt
|
61 |
+
RUN python3.8 -m pip install -r /tmp/requirements.txt
|
62 |
+
|
63 |
+
# Copy the application code into the container at /home/jovyan
|
64 |
+
COPY --chown=${NB_USER}:${NB_USER} . ${HOME}
|
65 |
+
|
66 |
+
# Expose port for Streamlit
|
67 |
+
EXPOSE 7860
|
68 |
+
|
69 |
+
# Define the entry point for the container
|
70 |
+
ENTRYPOINT ["streamlit", "run", "Demo.py", "--server.port=7860", "--server.address=0.0.0.0"]
|
inputs/audio-1.flac
ADDED
Binary file (112 kB). View file
|
|
inputs/audio-10.flac
ADDED
Binary file (76 kB). View file
|
|
inputs/audio-2.flac
ADDED
Binary file (49 kB). View file
|
|
inputs/audio-3.flac
ADDED
Binary file (74 kB). View file
|
|
inputs/audio-4.flac
ADDED
Binary file (113 kB). View file
|
|
inputs/audio-5.flac
ADDED
Binary file (138 kB). View file
|
|
inputs/audio-6.flac
ADDED
Binary file (36.5 kB). View file
|
|
inputs/audio-7.flac
ADDED
Binary file (177 kB). View file
|
|
inputs/audio-8.flac
ADDED
Binary file (94.3 kB). View file
|
|
inputs/audio-9.flac
ADDED
Binary file (129 kB). View file
|
|
pages/Workflow & Model Overview.py
ADDED
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
# Custom CSS for better styling
|
4 |
+
st.markdown("""
|
5 |
+
<style>
|
6 |
+
.main-title {
|
7 |
+
font-size: 36px;
|
8 |
+
color: #4A90E2;
|
9 |
+
font-weight: bold;
|
10 |
+
text-align: center;
|
11 |
+
}
|
12 |
+
.sub-title {
|
13 |
+
font-size: 24px;
|
14 |
+
color: #4A90E2;
|
15 |
+
margin-top: 20px;
|
16 |
+
}
|
17 |
+
.section {
|
18 |
+
background-color: #f9f9f9;
|
19 |
+
padding: 15px;
|
20 |
+
border-radius: 10px;
|
21 |
+
margin-top: 20px;
|
22 |
+
}
|
23 |
+
.section p, .section ul {
|
24 |
+
color: #666666;
|
25 |
+
}
|
26 |
+
.link {
|
27 |
+
color: #4A90E2;
|
28 |
+
text-decoration: none;
|
29 |
+
}
|
30 |
+
.benchmark-table {
|
31 |
+
width: 100%;
|
32 |
+
border-collapse: collapse;
|
33 |
+
margin-top: 20px;
|
34 |
+
}
|
35 |
+
.benchmark-table th, .benchmark-table td {
|
36 |
+
border: 1px solid #ddd;
|
37 |
+
padding: 8px;
|
38 |
+
text-align: left;
|
39 |
+
}
|
40 |
+
.benchmark-table th {
|
41 |
+
background-color: #4A90E2;
|
42 |
+
color: white;
|
43 |
+
}
|
44 |
+
.benchmark-table td {
|
45 |
+
background-color: #f2f2f2;
|
46 |
+
}
|
47 |
+
</style>
|
48 |
+
""", unsafe_allow_html=True)
|
49 |
+
|
50 |
+
# Main Title
|
51 |
+
st.markdown('<div class="main-title">Wav2Vec2 for Speech Recognition</div>', unsafe_allow_html=True)
|
52 |
+
|
53 |
+
# Description
|
54 |
+
st.markdown("""
|
55 |
+
<div class="section">
|
56 |
+
<p><strong>Wav2Vec2</strong> is a groundbreaking model in Automatic Speech Recognition (ASR), developed to learn speech representations from raw audio. This model achieves exceptional accuracy with minimal labeled data, making it ideal for low-resource settings. Adapted for Spark NLP, Wav2Vec2 enables scalable, production-ready ASR applications.</p>
|
57 |
+
</div>
|
58 |
+
""", unsafe_allow_html=True)
|
59 |
+
|
60 |
+
# Why, Where, and When to Use Wav2Vec2
|
61 |
+
st.markdown('<div class="sub-title">Why, Where, and When to Use Wav2Vec2</div>', unsafe_allow_html=True)
|
62 |
+
st.markdown("""
|
63 |
+
<div class="section">
|
64 |
+
<p>Use <strong>Wav2Vec2</strong> when you need a robust ASR solution that excels in scenarios with limited labeled data. It’s perfect for various speech-to-text applications where scalability and accuracy are critical. Some ideal use cases include:</p>
|
65 |
+
<ul>
|
66 |
+
<li><strong>Transcription Services:</strong> Efficiently convert large volumes of speech into text, vital for media, legal, and healthcare industries.</li>
|
67 |
+
<li><strong>Voice-Activated Assistants:</strong> Enhance the accuracy of voice commands in smart devices and personal assistants.</li>
|
68 |
+
<li><strong>Meeting Summarization:</strong> Automatically transcribe and summarize meetings, aiding in easy content review and catch-up for absentees.</li>
|
69 |
+
<li><strong>Language Learning Tools:</strong> Assist learners in improving pronunciation by providing real-time speech-to-text feedback.</li>
|
70 |
+
<li><strong>Accessibility Enhancements:</strong> Generate real-time captions for videos and live events, making content accessible to the hearing impaired.</li>
|
71 |
+
<li><strong>Call Center Analytics:</strong> Analyze customer interactions for insights and quality monitoring.</li>
|
72 |
+
</ul>
|
73 |
+
</div>
|
74 |
+
""", unsafe_allow_html=True)
|
75 |
+
|
76 |
+
# How to Use the Model
|
77 |
+
st.markdown('<div class="sub-title">How to Use the Model</div>', unsafe_allow_html=True)
|
78 |
+
st.code('''
|
79 |
+
audio_assembler = AudioAssembler() \\
|
80 |
+
.setInputCol("audio_content") \\
|
81 |
+
.setOutputCol("audio_assembler")
|
82 |
+
|
83 |
+
speech_to_text = Wav2Vec2ForCTC \\
|
84 |
+
.pretrained("asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman", "en")\\
|
85 |
+
.setInputCols("audio_assembler") \\
|
86 |
+
.setOutputCol("text")
|
87 |
+
|
88 |
+
pipeline = Pipeline(stages=[
|
89 |
+
audio_assembler,
|
90 |
+
speech_to_text,
|
91 |
+
])
|
92 |
+
|
93 |
+
pipelineModel = pipeline.fit(audioDf)
|
94 |
+
|
95 |
+
pipelineDF = pipelineModel.transform(audioDf)
|
96 |
+
''', language='python')
|
97 |
+
|
98 |
+
# Best Practices & Tips
|
99 |
+
st.markdown('<div class="sub-title">Best Practices & Tips</div>', unsafe_allow_html=True)
|
100 |
+
st.markdown("""
|
101 |
+
<div class="section">
|
102 |
+
<ul>
|
103 |
+
<li><strong>Preprocessing:</strong> Ensure your audio data is clear and well-prepared by removing background noise and normalizing audio levels for the best transcription results.</li>
|
104 |
+
<li><strong>Fine-tuning:</strong> For specific use cases or languages, consider fine-tuning the model on your own dataset to improve accuracy.</li>
|
105 |
+
<li><strong>Batch Processing:</strong> Leverage Spark NLP's distributed processing capabilities to handle large-scale audio datasets efficiently.</li>
|
106 |
+
<li><strong>Model Evaluation:</strong> Regularly evaluate the model's performance on your specific use case using metrics like Word Error Rate (WER) to ensure it meets your accuracy requirements.</li>
|
107 |
+
<li><strong>Resource Management:</strong> When deploying in production, monitor resource usage, especially for large models, to optimize performance and cost.</li>
|
108 |
+
</ul>
|
109 |
+
</div>
|
110 |
+
""", unsafe_allow_html=True)
|
111 |
+
|
112 |
+
# Model Information
|
113 |
+
st.markdown('<div class="sub-title">Model Information</div>', unsafe_allow_html=True)
|
114 |
+
st.markdown("""
|
115 |
+
<div class="section">
|
116 |
+
<table class="benchmark-table">
|
117 |
+
<tr>
|
118 |
+
<th>Attribute</th>
|
119 |
+
<th>Description</th>
|
120 |
+
</tr>
|
121 |
+
<tr>
|
122 |
+
<td><strong>Model Name</strong></td>
|
123 |
+
<td>asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman</td>
|
124 |
+
</tr>
|
125 |
+
<tr>
|
126 |
+
<td><strong>Compatibility</strong></td>
|
127 |
+
<td>Spark NLP 4.2.0+</td>
|
128 |
+
</tr>
|
129 |
+
<tr>
|
130 |
+
<td><strong>License</strong></td>
|
131 |
+
<td>Open Source</td>
|
132 |
+
</tr>
|
133 |
+
<tr>
|
134 |
+
<td><strong>Edition</strong></td>
|
135 |
+
<td>Official</td>
|
136 |
+
</tr>
|
137 |
+
<tr>
|
138 |
+
<td><strong>Input Labels</strong></td>
|
139 |
+
<td>[audio_assembler]</td>
|
140 |
+
</tr>
|
141 |
+
<tr>
|
142 |
+
<td><strong>Output Labels</strong></td>
|
143 |
+
<td>[text]</td>
|
144 |
+
</tr>
|
145 |
+
<tr>
|
146 |
+
<td><strong>Language</strong></td>
|
147 |
+
<td>en</td>
|
148 |
+
</tr>
|
149 |
+
<tr>
|
150 |
+
<td><strong>Size</strong></td>
|
151 |
+
<td>1.2 GB</td>
|
152 |
+
</tr>
|
153 |
+
</table>
|
154 |
+
</div>
|
155 |
+
""", unsafe_allow_html=True)
|
156 |
+
|
157 |
+
# Data Source Section
|
158 |
+
st.markdown('<div class="sub-title">Data Source</div>', unsafe_allow_html=True)
|
159 |
+
st.markdown("""
|
160 |
+
<div class="section">
|
161 |
+
<p>The Wav2Vec2 model is available on <a class="link" href="https://huggingface.co/jonatasgrosman/asr_wav2vec2_large_xlsr_53_english" target="_blank">Hugging Face</a>. This model, trained by <em>jonatasgrosman</em>, has been adapted for use with Spark NLP, ensuring it is optimized for large-scale applications.</p>
|
162 |
+
</div>
|
163 |
+
""", unsafe_allow_html=True)
|
164 |
+
|
165 |
+
# Conclusion
|
166 |
+
st.markdown('<div class="sub-title">Conclusion</div>', unsafe_allow_html=True)
|
167 |
+
st.markdown("""
|
168 |
+
<div class="section">
|
169 |
+
<p><strong>Wav2Vec2</strong> is a versatile and powerful ASR model that excels in scenarios with limited labeled data, making it a game-changer in the field of speech recognition. Its seamless integration with Spark NLP allows for scalable, efficient, and accurate deployment in various real-world applications, from transcription services to voice-activated systems.</p>
|
170 |
+
</div>
|
171 |
+
""", unsafe_allow_html=True)
|
172 |
+
|
173 |
+
# References
|
174 |
+
st.markdown('<div class="sub-title">References</div>', unsafe_allow_html=True)
|
175 |
+
st.markdown("""
|
176 |
+
<div class="section">
|
177 |
+
<ul>
|
178 |
+
<li><a class="link" href="https://sparknlp.org/2022/09/24/asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman_en.html" target="_blank">Wav2Vec2 Model on Spark NLP</a></li>
|
179 |
+
<li><a class="link" href="https://huggingface.co/jonatasgrosman/asr_wav2vec2_large_xlsr_53_english" target="_blank">Wav2Vec2 Model on Hugging Face</a></li>
|
180 |
+
<li><a class="link" href="https://arxiv.org/abs/2006.11477" target="_blank">wav2vec 2.0 Paper</a></li>
|
181 |
+
<li><a class="link" href="https://github.com/pytorch/fairseq/tree/master/examples/wav2vec" target="_blank">Wav2Vec2 GitHub Repository</a></li>
|
182 |
+
</ul>
|
183 |
+
</div>
|
184 |
+
""", unsafe_allow_html=True)
|
185 |
+
|
186 |
+
# Community & Support
|
187 |
+
st.markdown('<div class="sub-title">Community & Support</div>', unsafe_allow_html=True)
|
188 |
+
st.markdown("""
|
189 |
+
<div class="section">
|
190 |
+
<ul>
|
191 |
+
<li><a class="link" href="https://sparknlp.org/" target="_blank">Official Website</a>: Comprehensive documentation and examples.</li>
|
192 |
+
<li><a class="link" href="https://join.slack.com/t/spark-nlp/shared_invite/zt-198dipu77-L3UWNe_AJ8xqDk0ivmih5Q" target="_blank">Slack</a>: Join the community for live discussions and support.</li>
|
193 |
+
<li><a class="link" href="https://github.com/JohnSnowLabs/spark-nlp" target="_blank">GitHub</a>: Report issues, request features, and contribute to the project.</li>
|
194 |
+
<li><a class="link" href="https://medium.com/spark-nlp" target="_blank">Medium</a>: Read articles and tutorials about Spark NLP.</li>
|
195 |
+
<li><a class="link" href="https://www.youtube.com/channel/UCmFOjlpYEhxf_wJUDuz6xxQ/videos" target="_blank">YouTube</a>: Watch video tutorials and demonstrations.</li>
|
196 |
+
</ul>
|
197 |
+
</div>
|
198 |
+
""", unsafe_allow_html=True)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
spark-nlp
|
3 |
+
pyspark
|
4 |
+
librosa
|
5 |
+
pandas
|