abdullahmubeen10 commited on
Commit
9883a18
Β·
verified Β·
1 Parent(s): 6b52778

Update Demo.py

Browse files
Files changed (1) hide show
  1. Demo.py +147 -146
Demo.py CHANGED
@@ -1,146 +1,147 @@
1
- import streamlit as st
2
- import sparknlp
3
- import os
4
- import pandas as pd
5
-
6
- from sparknlp.base import *
7
- from sparknlp.annotator import *
8
- from pyspark.ml import Pipeline
9
- from sparknlp.pretrained import PretrainedPipeline
10
-
11
- # Page configuration
12
- st.set_page_config(
13
- layout="wide",
14
- initial_sidebar_state="auto"
15
- )
16
-
17
- # Custom CSS for styling
18
- st.markdown("""
19
- <style>
20
- .main-title {
21
- font-size: 36px;
22
- color: #4A90E2;
23
- font-weight: bold;
24
- text-align: center;
25
- }
26
- .section {
27
- background-color: #f9f9f9;
28
- padding: 10px;
29
- border-radius: 10px;
30
- margin-top: 10px;
31
- }
32
- .section p, .section ul {
33
- color: #666666;
34
- }
35
- </style>
36
- """, unsafe_allow_html=True)
37
-
38
- @st.cache_resource
39
- def init_spark():
40
- """Initialize Spark NLP."""
41
- return sparknlp.start()
42
-
43
- @st.cache_resource
44
- def create_pipeline(model):
45
- """Create a Spark NLP pipeline for audio processing."""
46
- audio_assembler = AudioAssembler() \
47
- .setInputCol("audio_content") \
48
- .setOutputCol("audio_assembler")
49
-
50
- speech_to_text = Wav2Vec2ForCTC \
51
- .pretrained(model)\
52
- .setInputCols("audio_assembler") \
53
- .setOutputCol("text")
54
-
55
- pipeline = Pipeline(stages=[
56
- audio_assembler,
57
- speech_to_text
58
- ])
59
- return pipeline
60
-
61
- def fit_data(pipeline, fed_data):
62
- """Fit the data into the pipeline and return the transcription."""
63
- data, sampling_rate = librosa.load(fed_data, sr=16000)
64
- data = [float(x) for x in data]
65
-
66
- schema = StructType([
67
- StructField("audio_content", ArrayType(FloatType())),
68
- StructField("sampling_rate", LongType())
69
- ])
70
-
71
- df = pd.DataFrame({
72
- "audio_content": [data],
73
- "sampling_rate": [sampling_rate]
74
- })
75
-
76
- spark_df = spark.createDataFrame(df, schema)
77
- pipeline_df = pipeline.fit(spark_df).transform(spark_df)
78
- return pipeline_df.select("text.result")
79
-
80
- def save_uploadedfile(uploadedfile, path):
81
- """Save the uploaded file to the specified path."""
82
- filepath = os.path.join(path, uploadedfile.name)
83
- with open(filepath, "wb") as f:
84
- if hasattr(uploadedfile, 'getbuffer'):
85
- f.write(uploadedfile.getbuffer())
86
- else:
87
- f.write(uploadedfile.read())
88
-
89
- # Sidebar content
90
- model_list = [
91
- "asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman",
92
- "asr_wav2vec2_base_100h_13K_steps",
93
- "asr_wav2vec2_base_100h_ngram",
94
- "asr_wav2vec2_base_100h_by_facebook",
95
- "asr_wav2vec2_base_100h_test",
96
- "asr_wav2vec2_base_960h"
97
- ]
98
-
99
- model = st.sidebar.selectbox(
100
- "Choose the pretrained model",
101
- model_list,
102
- help="For more info about the models visit: https://sparknlp.org/models"
103
- )
104
-
105
- # Main content
106
- st.markdown('<div class="main-title">Speech Recognition With Wav2Vec2ForCTC</div>', unsafe_allow_html=True)
107
- st.markdown('<div class="section"><p>This demo transcribes audio files into texts using the <code>Wav2Vec2ForCTC</code> Annotator and advanced speech recognition models.</p></div>', unsafe_allow_html=True)
108
-
109
- # Reference notebook link in sidebar
110
- st.sidebar.markdown('Reference notebook:')
111
- st.sidebar.markdown("""
112
- <a href="https://githubtocolab.com/JohnSnowLabs/spark-nlp-workshop/blob/master/open-source-nlp/17.0.Automatic_Speech_Recognition_Wav2Vec2.ipynb">
113
- <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
114
- </a>
115
- """, unsafe_allow_html=True)
116
-
117
- # Load examples
118
- AUDIO_FILE_PATH = "inputs"
119
- audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
120
-
121
- selected_audio = st.selectbox("Select an audio", audio_files)
122
-
123
- # Creating a simplified Python list of audio file types
124
- audio_file_types = ["mp3", "flac", "wav", "aac", "ogg", "aiff", "wma", "m4a", "ape", "dsf", "dff", "midi", "mid", "opus", "amr"]
125
- uploadedfile = st.file_uploader("Try it for yourself!", type=audio_file_types)
126
-
127
- if uploadedfile:
128
- selected_audio = f"{AUDIO_FILE_PATH}/{uploadedfile.name}"
129
- save_uploadedfile(uploadedfile, AUDIO_FILE_PATH)
130
- elif selected_audio:
131
- selected_audio = f"{AUDIO_FILE_PATH}/{selected_audio}"
132
-
133
- # Audio playback and transcription
134
- st.subheader("Play Audio")
135
-
136
- with open(selected_audio, 'rb') as audio_file:
137
- audio_bytes = audio_file.read()
138
- st.audio(audio_bytes)
139
-
140
- st.subheader(f"Transcription for {selected_audio}:")
141
-
142
- spark = init_spark()
143
- pipeline = create_pipeline(model)
144
- output = fit_data(pipeline, selected_audio)
145
-
146
- st.text(output.first().result[0].strip())
 
 
1
+ import streamlit as st
2
+ import sparknlp
3
+ import os
4
+ import pandas as pd
5
+ import librosa
6
+
7
+ from sparknlp.base import *
8
+ from sparknlp.annotator import *
9
+ from pyspark.ml import Pipeline
10
+ from sparknlp.pretrained import PretrainedPipeline
11
+
12
+ # Page configuration
13
+ st.set_page_config(
14
+ layout="wide",
15
+ initial_sidebar_state="auto"
16
+ )
17
+
18
+ # Custom CSS for styling
19
+ st.markdown("""
20
+ <style>
21
+ .main-title {
22
+ font-size: 36px;
23
+ color: #4A90E2;
24
+ font-weight: bold;
25
+ text-align: center;
26
+ }
27
+ .section {
28
+ background-color: #f9f9f9;
29
+ padding: 10px;
30
+ border-radius: 10px;
31
+ margin-top: 10px;
32
+ }
33
+ .section p, .section ul {
34
+ color: #666666;
35
+ }
36
+ </style>
37
+ """, unsafe_allow_html=True)
38
+
39
+ @st.cache_resource
40
+ def init_spark():
41
+ """Initialize Spark NLP."""
42
+ return sparknlp.start()
43
+
44
+ @st.cache_resource
45
+ def create_pipeline(model):
46
+ """Create a Spark NLP pipeline for audio processing."""
47
+ audio_assembler = AudioAssembler() \
48
+ .setInputCol("audio_content") \
49
+ .setOutputCol("audio_assembler")
50
+
51
+ speech_to_text = Wav2Vec2ForCTC \
52
+ .pretrained(model)\
53
+ .setInputCols("audio_assembler") \
54
+ .setOutputCol("text")
55
+
56
+ pipeline = Pipeline(stages=[
57
+ audio_assembler,
58
+ speech_to_text
59
+ ])
60
+ return pipeline
61
+
62
+ def fit_data(pipeline, fed_data):
63
+ """Fit the data into the pipeline and return the transcription."""
64
+ data, sampling_rate = librosa.load(fed_data, sr=16000)
65
+ data = [float(x) for x in data]
66
+
67
+ schema = StructType([
68
+ StructField("audio_content", ArrayType(FloatType())),
69
+ StructField("sampling_rate", LongType())
70
+ ])
71
+
72
+ df = pd.DataFrame({
73
+ "audio_content": [data],
74
+ "sampling_rate": [sampling_rate]
75
+ })
76
+
77
+ spark_df = spark.createDataFrame(df, schema)
78
+ pipeline_df = pipeline.fit(spark_df).transform(spark_df)
79
+ return pipeline_df.select("text.result")
80
+
81
+ def save_uploadedfile(uploadedfile, path):
82
+ """Save the uploaded file to the specified path."""
83
+ filepath = os.path.join(path, uploadedfile.name)
84
+ with open(filepath, "wb") as f:
85
+ if hasattr(uploadedfile, 'getbuffer'):
86
+ f.write(uploadedfile.getbuffer())
87
+ else:
88
+ f.write(uploadedfile.read())
89
+
90
+ # Sidebar content
91
+ model_list = [
92
+ "asr_wav2vec2_large_xlsr_53_english_by_jonatasgrosman",
93
+ "asr_wav2vec2_base_100h_13K_steps",
94
+ "asr_wav2vec2_base_100h_ngram",
95
+ "asr_wav2vec2_base_100h_by_facebook",
96
+ "asr_wav2vec2_base_100h_test",
97
+ "asr_wav2vec2_base_960h"
98
+ ]
99
+
100
+ model = st.sidebar.selectbox(
101
+ "Choose the pretrained model",
102
+ model_list,
103
+ help="For more info about the models visit: https://sparknlp.org/models"
104
+ )
105
+
106
+ # Main content
107
+ st.markdown('<div class="main-title">Speech Recognition With Wav2Vec2ForCTC</div>', unsafe_allow_html=True)
108
+ st.markdown('<div class="section"><p>This demo transcribes audio files into texts using the <code>Wav2Vec2ForCTC</code> Annotator and advanced speech recognition models.</p></div>', unsafe_allow_html=True)
109
+
110
+ # Reference notebook link in sidebar
111
+ st.sidebar.markdown('Reference notebook:')
112
+ st.sidebar.markdown("""
113
+ <a href="https://githubtocolab.com/JohnSnowLabs/spark-nlp-workshop/blob/master/open-source-nlp/17.0.Automatic_Speech_Recognition_Wav2Vec2.ipynb">
114
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" style="zoom: 1.3" alt="Open In Colab"/>
115
+ </a>
116
+ """, unsafe_allow_html=True)
117
+
118
+ # Load examples
119
+ AUDIO_FILE_PATH = "inputs"
120
+ audio_files = sorted(os.listdir(AUDIO_FILE_PATH))
121
+
122
+ selected_audio = st.selectbox("Select an audio", audio_files)
123
+
124
+ # Creating a simplified Python list of audio file types
125
+ audio_file_types = ["mp3", "flac", "wav", "aac", "ogg", "aiff", "wma", "m4a", "ape", "dsf", "dff", "midi", "mid", "opus", "amr"]
126
+ uploadedfile = st.file_uploader("Try it for yourself!", type=audio_file_types)
127
+
128
+ if uploadedfile:
129
+ selected_audio = f"{AUDIO_FILE_PATH}/{uploadedfile.name}"
130
+ save_uploadedfile(uploadedfile, AUDIO_FILE_PATH)
131
+ elif selected_audio:
132
+ selected_audio = f"{AUDIO_FILE_PATH}/{selected_audio}"
133
+
134
+ # Audio playback and transcription
135
+ st.subheader("Play Audio")
136
+
137
+ with open(selected_audio, 'rb') as audio_file:
138
+ audio_bytes = audio_file.read()
139
+ st.audio(audio_bytes)
140
+
141
+ st.subheader(f"Transcription for {selected_audio}:")
142
+
143
+ spark = init_spark()
144
+ pipeline = create_pipeline(model)
145
+ output = fit_data(pipeline, selected_audio)
146
+
147
+ st.text(output.first().result[0].strip())