j-tobias
initial Commit
744c1ac
import gradio as gr
import numpy as np
import librosa
import hashlib
import json
import os
from scipy.io.wavfile import write as wav_write
from check import transcibe, estimate_audio_quality
def visible():
return gr.update(visible=True),gr.update(visible=True)
def check(audio:gr.Audio):
sr, audio = audio
audio = audio.astype(np.float32)
if len(audio.shape) > 2 and audio.shape[1] > 1:
audio = np.mean(audio, axis=1)
quality, quality_score, features = estimate_audio_quality(audio, sr)
audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=16000)
transcription = transcibe(audio_resampled, 16000)
check_result = f"""
### Results
Qualtiy (0-1):{quality_score} - {quality}
Feautes:
"""
for key, value in features.items():
check_result += f"""
- {key}: {round(value,3)}
"""
return transcription, check_result
def generate_sample_id(audio, transcription):
# Combine the audio and transcription into a single string
combined = f"{audio.tostring()}{transcription}"
# Generate a hash of the combined string
sample_id = hashlib.sha256(combined.encode('utf-8')).hexdigest()
return sample_id[:10] # Return the first 10 characters of the hash as the ID
def save_sample(audio, transcription, check_result):
sample_id = generate_sample_id(audio, transcription)
# Create a directory to save the sample
directory = f"{sample_id}_data"
if not os.path.exists(directory):
os.makedirs(directory)
# Save the audio as a .wav file
audio_filename = os.path.join(directory, f"{sample_id}.wav")
wav_write(audio_filename, 16000, audio) # Assuming a sample rate of 16kHz
# Save the transcription as a .txt file
transcription_filename = os.path.join(directory, f"{sample_id}_transcription.txt")
with open(transcription_filename, 'w') as f:
f.write(transcription)
# Save the check_result as a JSON file
check_result_filename = os.path.join(directory, f"{sample_id}_features.json")
with open(check_result_filename, 'w') as f:
json.dump(check_result, f)
with gr.Blocks() as app:
gr.Markdown("# Open ASR Dataset")
gr.Markdown(" ")
gr.Markdown("This App is an effort to collectively crowdsource a new Dataset for the ASR community")
# gr.Markdown("You can Access the Dataset (here)[LINK]")
gr.Markdown("The Dataset will be updated every 100 created samples")
gr.Markdown(" ")
gr.Markdown("Create a New Sample")
new_audio = gr.Audio(
label = "Add Audio",
sources=['upload','microphone']
)
with gr.Row():
language = gr.Radio(
label="Spoken Language",
choices=["English","French","Spanish","German","Italian", "Chinese", "Japanese", "Arabic", "Russian", "Portuguese"],
value= "English",
scale=4
)
bckgrnd_noise = gr.Radio(
label="Background Noise Level",
choices=["Quiet", "Moderate", "Noisy"],
value="Moderate",
scale=2
)
recording_env = gr.Radio(
label="Recording Environment",
choices=["Studio", "Home", "Outdoors", "Office"],
value="Home",
scale=2
)
check_sample_btn = gr.Button(
value="Check Sample",
variant="secondary",
size="sm"
)
with gr.Row():
check_result = gr.Markdown()
transcription = gr.TextArea(
label="Transcription",
visible=False,
interactive=True
)
save_sample_button = gr.Button(
value="Save Sample",
variant="primary",
size="sm",
scale=1,
visible=False
)
check_sample_btn.click(visible, outputs=[transcription, save_sample_button])
check_sample_btn.click(check, inputs=[new_audio], outputs=[transcription, check_result])
save_sample_button.click(save_sample, inputs=[new_audio, transcription, check_result])
app.launch()