Spaces:

clr
/

prosalign

Sleeping

App Files Files Community

clr commited on Mar 30, 2023

Commit

3c81006

1 Parent(s): a2f5f66

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -22

app.py CHANGED Viewed

@@ -53,20 +53,11 @@ def f1(langname,lang_aligner):
     elif langname =="Faroese":
         ds = datas.ds_f
-    #fig = plt.figure(figsize=(10,4))
-    #plt.axline((0,0),slope=1,color="darkgray")
-    #plt.xlabel("Vowel length (ms)")
-    #plt.ylabel("Consonant length (ms)")
     maxdat=len(ds)
     ds = ds.select([random.randint(maxdat-1)])
-    #print([th for th in ds.sample()])
     sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
     transcript = ds['normalized_text'][0]
-    #print('PLACE A:',lang_aligner)
     return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path)
@@ -74,7 +65,19 @@ bl = gr.Blocks()
 with bl:
-    lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language")#, info="Loading the dataset takes some time")
     align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]'))
@@ -84,8 +87,8 @@ with bl:
     with gr.Row():
-        btn1 = gr.Button(value="The random prosody button")
-        btn1.style(full_width=False, size="sm")
         audio1 = gr.Audio(interactive=False)
     pl1 = gr.Plot()
@@ -101,21 +104,20 @@ with bl:
     gr.Markdown(
         """
     # ABOUT
-    This is a work-in-progress demo.
-    Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).
-    After you select a language, a few example sentences from the corpus are displayed.
-    Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above.
-    [ABOUT REAPER PITCH TRACKING - TODO]
-    [ABOUT RMSE INTENSITY - TODO]
     [ABOUT CTC ALIGNMENT - TODO]
-    caitlinr@ru.is / https://github.com/catiR/
     """
     )

     elif langname =="Faroese":
         ds = datas.ds_f
     maxdat=len(ds)
     ds = ds.select([random.randint(maxdat-1)])
     sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
     transcript = ds['normalized_text'][0]
     return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path)
 with bl:
+    with gr.Row():
+        gr.Markdown(
+        """
+        # Demo under construction
+        ## 1. Choose a language to load
+        ## 2. See a small sample of the selected corpus
+        ## 3. Click the button below to view time-aligned prosody information for a random example (from the whole corpus, not necessarily the shown sample)
+        Pitch is shown in dark blue and loudness is the light orange line.
+        The pitch estimation, and the time-alignment of words to audio, are completely automated and there will be some inaccuracy.
+        More information below.
+        """ )
+        lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Language")#, info="Loading the dataset takes some time")
     align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]'))
     with gr.Row():
+        btn1 = gr.Button(value="CLICK HERE")
+        btn1.style(full_width=False)
         audio1 = gr.Audio(interactive=False)
     pl1 = gr.Plot()
     gr.Markdown(
         """
     # ABOUT
+    The Icelandic corpus is [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr), and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).
+    ### Pitch tracking (F0 estimation)
+    Estimated pitch is shown in blue on the graphs, as tracked by [REAPER](https://github.com/google/REAPER).
+    ### Intensity
+    The orange line is root mean squared energy, which reflects loudness and is also a good indication of syllable placement, as it should line up with vowels and similar sounds.
     [ABOUT CTC ALIGNMENT - TODO]
+    This is a work-in-progress basic demo for automatic prosodic annotation in Faroese and Icelandic.
+    Contact [email protected] / https://github.com/catiR/ when things break, or with ideas/suggestions about how to apply this.
+    The source code is available under the Files tab at the top of the Space.
     """
     )