File size: 4,078 Bytes
744c1ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
import numpy as np
import librosa
import hashlib
import json
import os

from scipy.io.wavfile import write as wav_write
from check import transcibe, estimate_audio_quality

def visible():
    return gr.update(visible=True),gr.update(visible=True)

def check(audio:gr.Audio):

    sr, audio = audio
    audio = audio.astype(np.float32)
    if len(audio.shape) > 2 and audio.shape[1] > 1:
        audio = np.mean(audio, axis=1)

    quality, quality_score, features = estimate_audio_quality(audio, sr)
    audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=16000)
    transcription = transcibe(audio_resampled, 16000)

    check_result = f"""
    ### Results

    Qualtiy (0-1):{quality_score}   - {quality}

    Feautes:
    """

    for key, value in features.items():
        check_result += f"""
    - {key}:   {round(value,3)}
    """

    return transcription, check_result

def generate_sample_id(audio, transcription):
    # Combine the audio and transcription into a single string
    combined = f"{audio.tostring()}{transcription}"
    
    # Generate a hash of the combined string
    sample_id = hashlib.sha256(combined.encode('utf-8')).hexdigest()
    
    return sample_id[:10]  # Return the first 10 characters of the hash as the ID


def save_sample(audio, transcription, check_result):
    sample_id = generate_sample_id(audio, transcription)

    # Create a directory to save the sample
    directory = f"{sample_id}_data"
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    # Save the audio as a .wav file
    audio_filename = os.path.join(directory, f"{sample_id}.wav")
    wav_write(audio_filename, 16000, audio)  # Assuming a sample rate of 16kHz
    
    # Save the transcription as a .txt file
    transcription_filename = os.path.join(directory, f"{sample_id}_transcription.txt")
    with open(transcription_filename, 'w') as f:
        f.write(transcription)
    
    # Save the check_result as a JSON file
    check_result_filename = os.path.join(directory, f"{sample_id}_features.json")
    with open(check_result_filename, 'w') as f:
        json.dump(check_result, f)

with gr.Blocks() as app:

    gr.Markdown("# Open ASR Dataset")
    gr.Markdown("   ")
    gr.Markdown("This App is an effort to collectively crowdsource a new Dataset for the ASR community")
    # gr.Markdown("You can Access the Dataset (here)[LINK]")
    gr.Markdown("The Dataset will be updated every 100 created samples")
    gr.Markdown("   ")
    gr.Markdown("Create a New Sample")


    new_audio = gr.Audio(
        label = "Add Audio",
        sources=['upload','microphone']
    )
    with gr.Row():

        language = gr.Radio(
            label="Spoken Language",
            choices=["English","French","Spanish","German","Italian", "Chinese", "Japanese", "Arabic", "Russian", "Portuguese"],
            value= "English",
            scale=4
        )

        bckgrnd_noise = gr.Radio(
            label="Background Noise Level",
            choices=["Quiet", "Moderate", "Noisy"],
            value="Moderate",
            scale=2
        )
        
        recording_env = gr.Radio(
            label="Recording Environment",
            choices=["Studio", "Home", "Outdoors", "Office"],
            value="Home",
            scale=2
        )

    check_sample_btn = gr.Button(
        value="Check Sample",
        variant="secondary",
        size="sm"
    )
    
    with gr.Row():

        check_result = gr.Markdown()

        transcription = gr.TextArea(
            label="Transcription",
            visible=False,
            interactive=True
        )

    save_sample_button = gr.Button(
        value="Save Sample",
        variant="primary",
        size="sm",
        scale=1,
        visible=False
    )

    check_sample_btn.click(visible, outputs=[transcription, save_sample_button])
    check_sample_btn.click(check, inputs=[new_audio], outputs=[transcription, check_result])
    
    save_sample_button.click(save_sample, inputs=[new_audio, transcription, check_result])


app.launch()