import gradio as gr import numpy as np import librosa import hashlib import json import os from scipy.io.wavfile import write as wav_write from check import transcibe, estimate_audio_quality def visible(): return gr.update(visible=True),gr.update(visible=True) def check(audio:gr.Audio): sr, audio = audio audio = audio.astype(np.float32) if len(audio.shape) > 2 and audio.shape[1] > 1: audio = np.mean(audio, axis=1) quality, quality_score, features = estimate_audio_quality(audio, sr) audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=16000) transcription = transcibe(audio_resampled, 16000) check_result = f""" ### Results Qualtiy (0-1):{quality_score} - {quality} Feautes: """ for key, value in features.items(): check_result += f""" - {key}: {round(value,3)} """ return transcription, check_result def generate_sample_id(audio, transcription): # Combine the audio and transcription into a single string combined = f"{audio.tostring()}{transcription}" # Generate a hash of the combined string sample_id = hashlib.sha256(combined.encode('utf-8')).hexdigest() return sample_id[:10] # Return the first 10 characters of the hash as the ID def save_sample(audio, transcription, check_result): sample_id = generate_sample_id(audio, transcription) # Create a directory to save the sample directory = f"{sample_id}_data" if not os.path.exists(directory): os.makedirs(directory) # Save the audio as a .wav file audio_filename = os.path.join(directory, f"{sample_id}.wav") wav_write(audio_filename, 16000, audio) # Assuming a sample rate of 16kHz # Save the transcription as a .txt file transcription_filename = os.path.join(directory, f"{sample_id}_transcription.txt") with open(transcription_filename, 'w') as f: f.write(transcription) # Save the check_result as a JSON file check_result_filename = os.path.join(directory, f"{sample_id}_features.json") with open(check_result_filename, 'w') as f: json.dump(check_result, f) with gr.Blocks() as app: gr.Markdown("# Open ASR Dataset") gr.Markdown(" ") gr.Markdown("This App is an effort to collectively crowdsource a new Dataset for the ASR community") # gr.Markdown("You can Access the Dataset (here)[LINK]") gr.Markdown("The Dataset will be updated every 100 created samples") gr.Markdown(" ") gr.Markdown("Create a New Sample") new_audio = gr.Audio( label = "Add Audio", sources=['upload','microphone'] ) with gr.Row(): language = gr.Radio( label="Spoken Language", choices=["English","French","Spanish","German","Italian", "Chinese", "Japanese", "Arabic", "Russian", "Portuguese"], value= "English", scale=4 ) bckgrnd_noise = gr.Radio( label="Background Noise Level", choices=["Quiet", "Moderate", "Noisy"], value="Moderate", scale=2 ) recording_env = gr.Radio( label="Recording Environment", choices=["Studio", "Home", "Outdoors", "Office"], value="Home", scale=2 ) check_sample_btn = gr.Button( value="Check Sample", variant="secondary", size="sm" ) with gr.Row(): check_result = gr.Markdown() transcription = gr.TextArea( label="Transcription", visible=False, interactive=True ) save_sample_button = gr.Button( value="Save Sample", variant="primary", size="sm", scale=1, visible=False ) check_sample_btn.click(visible, outputs=[transcription, save_sample_button]) check_sample_btn.click(check, inputs=[new_audio], outputs=[transcription, check_result]) save_sample_button.click(save_sample, inputs=[new_audio, transcription, check_result]) app.launch()