File size: 4,078 Bytes
744c1ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import gradio as gr
import numpy as np
import librosa
import hashlib
import json
import os
from scipy.io.wavfile import write as wav_write
from check import transcibe, estimate_audio_quality
def visible():
return gr.update(visible=True),gr.update(visible=True)
def check(audio:gr.Audio):
sr, audio = audio
audio = audio.astype(np.float32)
if len(audio.shape) > 2 and audio.shape[1] > 1:
audio = np.mean(audio, axis=1)
quality, quality_score, features = estimate_audio_quality(audio, sr)
audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=16000)
transcription = transcibe(audio_resampled, 16000)
check_result = f"""
### Results
Qualtiy (0-1):{quality_score} - {quality}
Feautes:
"""
for key, value in features.items():
check_result += f"""
- {key}: {round(value,3)}
"""
return transcription, check_result
def generate_sample_id(audio, transcription):
# Combine the audio and transcription into a single string
combined = f"{audio.tostring()}{transcription}"
# Generate a hash of the combined string
sample_id = hashlib.sha256(combined.encode('utf-8')).hexdigest()
return sample_id[:10] # Return the first 10 characters of the hash as the ID
def save_sample(audio, transcription, check_result):
sample_id = generate_sample_id(audio, transcription)
# Create a directory to save the sample
directory = f"{sample_id}_data"
if not os.path.exists(directory):
os.makedirs(directory)
# Save the audio as a .wav file
audio_filename = os.path.join(directory, f"{sample_id}.wav")
wav_write(audio_filename, 16000, audio) # Assuming a sample rate of 16kHz
# Save the transcription as a .txt file
transcription_filename = os.path.join(directory, f"{sample_id}_transcription.txt")
with open(transcription_filename, 'w') as f:
f.write(transcription)
# Save the check_result as a JSON file
check_result_filename = os.path.join(directory, f"{sample_id}_features.json")
with open(check_result_filename, 'w') as f:
json.dump(check_result, f)
with gr.Blocks() as app:
gr.Markdown("# Open ASR Dataset")
gr.Markdown(" ")
gr.Markdown("This App is an effort to collectively crowdsource a new Dataset for the ASR community")
# gr.Markdown("You can Access the Dataset (here)[LINK]")
gr.Markdown("The Dataset will be updated every 100 created samples")
gr.Markdown(" ")
gr.Markdown("Create a New Sample")
new_audio = gr.Audio(
label = "Add Audio",
sources=['upload','microphone']
)
with gr.Row():
language = gr.Radio(
label="Spoken Language",
choices=["English","French","Spanish","German","Italian", "Chinese", "Japanese", "Arabic", "Russian", "Portuguese"],
value= "English",
scale=4
)
bckgrnd_noise = gr.Radio(
label="Background Noise Level",
choices=["Quiet", "Moderate", "Noisy"],
value="Moderate",
scale=2
)
recording_env = gr.Radio(
label="Recording Environment",
choices=["Studio", "Home", "Outdoors", "Office"],
value="Home",
scale=2
)
check_sample_btn = gr.Button(
value="Check Sample",
variant="secondary",
size="sm"
)
with gr.Row():
check_result = gr.Markdown()
transcription = gr.TextArea(
label="Transcription",
visible=False,
interactive=True
)
save_sample_button = gr.Button(
value="Save Sample",
variant="primary",
size="sm",
scale=1,
visible=False
)
check_sample_btn.click(visible, outputs=[transcription, save_sample_button])
check_sample_btn.click(check, inputs=[new_audio], outputs=[transcription, check_result])
save_sample_button.click(save_sample, inputs=[new_audio, transcription, check_result])
app.launch() |