|
import gradio as gr |
|
import numpy as np |
|
import librosa |
|
import hashlib |
|
import json |
|
import os |
|
|
|
from scipy.io.wavfile import write as wav_write |
|
from check import transcibe, estimate_audio_quality |
|
|
|
def visible(): |
|
return gr.update(visible=True),gr.update(visible=True) |
|
|
|
def check(audio:gr.Audio): |
|
|
|
sr, audio = audio |
|
audio = audio.astype(np.float32) |
|
if len(audio.shape) > 2 and audio.shape[1] > 1: |
|
audio = np.mean(audio, axis=1) |
|
|
|
quality, quality_score, features = estimate_audio_quality(audio, sr) |
|
audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=16000) |
|
transcription = transcibe(audio_resampled, 16000) |
|
|
|
check_result = f""" |
|
### Results |
|
|
|
Qualtiy (0-1):{quality_score} - {quality} |
|
|
|
Feautes: |
|
""" |
|
|
|
for key, value in features.items(): |
|
check_result += f""" |
|
- {key}: {round(value,3)} |
|
""" |
|
|
|
return transcription, check_result |
|
|
|
def generate_sample_id(audio, transcription): |
|
|
|
combined = f"{audio.tostring()}{transcription}" |
|
|
|
|
|
sample_id = hashlib.sha256(combined.encode('utf-8')).hexdigest() |
|
|
|
return sample_id[:10] |
|
|
|
|
|
def save_sample(audio, transcription, check_result): |
|
sample_id = generate_sample_id(audio, transcription) |
|
|
|
|
|
directory = f"{sample_id}_data" |
|
if not os.path.exists(directory): |
|
os.makedirs(directory) |
|
|
|
|
|
audio_filename = os.path.join(directory, f"{sample_id}.wav") |
|
wav_write(audio_filename, 16000, audio) |
|
|
|
|
|
transcription_filename = os.path.join(directory, f"{sample_id}_transcription.txt") |
|
with open(transcription_filename, 'w') as f: |
|
f.write(transcription) |
|
|
|
|
|
check_result_filename = os.path.join(directory, f"{sample_id}_features.json") |
|
with open(check_result_filename, 'w') as f: |
|
json.dump(check_result, f) |
|
|
|
with gr.Blocks() as app: |
|
|
|
gr.Markdown("# Open ASR Dataset") |
|
gr.Markdown(" ") |
|
gr.Markdown("This App is an effort to collectively crowdsource a new Dataset for the ASR community") |
|
|
|
gr.Markdown("The Dataset will be updated every 100 created samples") |
|
gr.Markdown(" ") |
|
gr.Markdown("Create a New Sample") |
|
|
|
|
|
new_audio = gr.Audio( |
|
label = "Add Audio", |
|
sources=['upload','microphone'] |
|
) |
|
with gr.Row(): |
|
|
|
language = gr.Radio( |
|
label="Spoken Language", |
|
choices=["English","French","Spanish","German","Italian", "Chinese", "Japanese", "Arabic", "Russian", "Portuguese"], |
|
value= "English", |
|
scale=4 |
|
) |
|
|
|
bckgrnd_noise = gr.Radio( |
|
label="Background Noise Level", |
|
choices=["Quiet", "Moderate", "Noisy"], |
|
value="Moderate", |
|
scale=2 |
|
) |
|
|
|
recording_env = gr.Radio( |
|
label="Recording Environment", |
|
choices=["Studio", "Home", "Outdoors", "Office"], |
|
value="Home", |
|
scale=2 |
|
) |
|
|
|
check_sample_btn = gr.Button( |
|
value="Check Sample", |
|
variant="secondary", |
|
size="sm" |
|
) |
|
|
|
with gr.Row(): |
|
|
|
check_result = gr.Markdown() |
|
|
|
transcription = gr.TextArea( |
|
label="Transcription", |
|
visible=False, |
|
interactive=True |
|
) |
|
|
|
save_sample_button = gr.Button( |
|
value="Save Sample", |
|
variant="primary", |
|
size="sm", |
|
scale=1, |
|
visible=False |
|
) |
|
|
|
check_sample_btn.click(visible, outputs=[transcription, save_sample_button]) |
|
check_sample_btn.click(check, inputs=[new_audio], outputs=[transcription, check_result]) |
|
|
|
save_sample_button.click(save_sample, inputs=[new_audio, transcription, check_result]) |
|
|
|
|
|
app.launch() |