Spaces:
Runtime error
Runtime error
Harshad Bhandwaldar
commited on
Commit
·
a606014
1
Parent(s):
0a18830
model added
Browse files
app.py
CHANGED
@@ -10,21 +10,21 @@ model = nemo_asr.models.EncDecCTCModel.from_pretrained(
|
|
10 |
model_name="stt_en_quartznet15x5"
|
11 |
)
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
|
19 |
def speech_file(x):
|
20 |
-
print(x)
|
21 |
text = model.transcribe([f"{x}"])
|
22 |
-
print(text)
|
23 |
return text
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
|
29 |
css = """
|
30 |
.gradio-container {
|
@@ -112,23 +112,23 @@ css = """
|
|
112 |
with gr.Blocks(css = css) as demo:
|
113 |
gr.Markdown(
|
114 |
"""
|
115 |
-
# Speech to Text
|
116 |
-
|
117 |
""")
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
with gr.Tab("Audio File"):
|
123 |
with gr.Row().style(equal_height=True):
|
124 |
audio_input2 = gr.Audio(label="Audio File", type="filepath")
|
125 |
text_output2 = gr.Textbox(label="Transcription", show_label=False)
|
126 |
file_button = gr.Button("Transcribe")
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
gr.HTML('''
|
133 |
<div class="footer">
|
134 |
<p></a>
|
@@ -136,8 +136,8 @@ with gr.Blocks(css = css) as demo:
|
|
136 |
</div>
|
137 |
''')
|
138 |
|
139 |
-
|
140 |
file_button.click(speech_file, inputs=audio_input2, outputs=text_output2)
|
141 |
-
|
142 |
|
143 |
demo.launch()
|
|
|
10 |
model_name="stt_en_quartznet15x5"
|
11 |
)
|
12 |
|
13 |
+
def speech_youtube(x):
|
14 |
+
data = pytube.YouTube([f"{x}"])
|
15 |
+
audio = data.streams.get_audio_only()
|
16 |
+
text = model.transcribe(audio.download())
|
17 |
+
return text
|
18 |
|
19 |
def speech_file(x):
|
20 |
+
# print(x)
|
21 |
text = model.transcribe([f"{x}"])
|
22 |
+
# print(text)
|
23 |
return text
|
24 |
|
25 |
+
def speech_record(x):
|
26 |
+
text = model.transcribe([f"{x}"])
|
27 |
+
return text
|
28 |
|
29 |
css = """
|
30 |
.gradio-container {
|
|
|
112 |
with gr.Blocks(css = css) as demo:
|
113 |
gr.Markdown(
|
114 |
"""
|
115 |
+
# Speech to Text - NVIDIA Qaurtznet15x5 (English)
|
116 |
+
QuartzNet is a Jasper-like network that uses separable convolutions and larger filter sizes. It has comparable accuracy to Jasper while having much fewer parameters. This particular model has 15 blocks each repeated 5 times.
|
117 |
""")
|
118 |
+
with gr.Tab("YouTube"):
|
119 |
+
audio_input = gr.Textbox(label="YouTube Link", placeholder="paste the youtube link here")
|
120 |
+
text_output = gr.Textbox(label="Transcription", show_label=False)
|
121 |
+
youtube_button = gr.Button("Transcribe")
|
122 |
with gr.Tab("Audio File"):
|
123 |
with gr.Row().style(equal_height=True):
|
124 |
audio_input2 = gr.Audio(label="Audio File", type="filepath")
|
125 |
text_output2 = gr.Textbox(label="Transcription", show_label=False)
|
126 |
file_button = gr.Button("Transcribe")
|
127 |
+
with gr.Tab("Record"):
|
128 |
+
with gr.Row().style(equal_height=True):
|
129 |
+
audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
|
130 |
+
text_output3 = gr.Textbox(label="Transcription", show_label=False)
|
131 |
+
rec_button = gr.Button("Transcribe")
|
132 |
gr.HTML('''
|
133 |
<div class="footer">
|
134 |
<p></a>
|
|
|
136 |
</div>
|
137 |
''')
|
138 |
|
139 |
+
youtube_button.click(speech_youtube, inputs=audio_input, outputs=text_output)
|
140 |
file_button.click(speech_file, inputs=audio_input2, outputs=text_output2)
|
141 |
+
rec_button.click(speech_record, inputs=audio_input3, outputs=text_output3)
|
142 |
|
143 |
demo.launch()
|