clr commited on
Commit
3c81006
·
1 Parent(s): a2f5f66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -22
app.py CHANGED
@@ -53,20 +53,11 @@ def f1(langname,lang_aligner):
53
  elif langname =="Faroese":
54
  ds = datas.ds_f
55
 
56
-
57
- #fig = plt.figure(figsize=(10,4))
58
- #plt.axline((0,0),slope=1,color="darkgray")
59
- #plt.xlabel("Vowel length (ms)")
60
- #plt.ylabel("Consonant length (ms)")
61
-
62
-
63
  maxdat=len(ds)
64
 
65
  ds = ds.select([random.randint(maxdat-1)])
66
- #print([th for th in ds.sample()])
67
  sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
68
  transcript = ds['normalized_text'][0]
69
- #print('PLACE A:',lang_aligner)
70
  return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path)
71
 
72
 
@@ -74,7 +65,19 @@ bl = gr.Blocks()
74
 
75
  with bl:
76
 
77
- lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language")#, info="Loading the dataset takes some time")
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]'))
80
 
@@ -84,8 +87,8 @@ with bl:
84
 
85
 
86
  with gr.Row():
87
- btn1 = gr.Button(value="The random prosody button")
88
- btn1.style(full_width=False, size="sm")
89
  audio1 = gr.Audio(interactive=False)
90
 
91
  pl1 = gr.Plot()
@@ -101,21 +104,20 @@ with bl:
101
  gr.Markdown(
102
  """
103
  # ABOUT
104
- This is a work-in-progress demo.
105
-
106
- Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus, and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).
107
-
108
- After you select a language, a few example sentences from the corpus are displayed.
109
-
110
- Click the button to view time-aligned prosody information for a random sentence - this could be any sentence, not only one of the ones shown above.
111
 
112
- [ABOUT REAPER PITCH TRACKING - TODO]
 
113
 
114
- [ABOUT RMSE INTENSITY - TODO]
 
115
 
116
  [ABOUT CTC ALIGNMENT - TODO]
117
 
118
- caitlinr@ru.is / https://github.com/catiR/
 
 
119
  """
120
  )
121
 
 
53
  elif langname =="Faroese":
54
  ds = datas.ds_f
55
 
 
 
 
 
 
 
 
56
  maxdat=len(ds)
57
 
58
  ds = ds.select([random.randint(maxdat-1)])
 
59
  sound_path = ds['audio'][0]['path'] # audio 0 array is the audio data itself
60
  transcript = ds['normalized_text'][0]
 
61
  return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path)
62
 
63
 
 
65
 
66
  with bl:
67
 
68
+ with gr.Row():
69
+ gr.Markdown(
70
+ """
71
+ # Demo under construction
72
+ ## 1. Choose a language to load
73
+ ## 2. See a small sample of the selected corpus
74
+ ## 3. Click the button below to view time-aligned prosody information for a random example (from the whole corpus, not necessarily the shown sample)
75
+
76
+ Pitch is shown in dark blue and loudness is the light orange line.
77
+ The pitch estimation, and the time-alignment of words to audio, are completely automated and there will be some inaccuracy.
78
+ More information below.
79
+ """ )
80
+ lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Language")#, info="Loading the dataset takes some time")
81
 
82
  align_func = gr.State()#value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]'))
83
 
 
87
 
88
 
89
  with gr.Row():
90
+ btn1 = gr.Button(value="CLICK HERE")
91
+ btn1.style(full_width=False)
92
  audio1 = gr.Audio(interactive=False)
93
 
94
  pl1 = gr.Plot()
 
104
  gr.Markdown(
105
  """
106
  # ABOUT
107
+
108
+ The Icelandic corpus is [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr), and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr).
 
 
 
 
 
109
 
110
+ ### Pitch tracking (F0 estimation)
111
+ Estimated pitch is shown in blue on the graphs, as tracked by [REAPER](https://github.com/google/REAPER).
112
 
113
+ ### Intensity
114
+ The orange line is root mean squared energy, which reflects loudness and is also a good indication of syllable placement, as it should line up with vowels and similar sounds.
115
 
116
  [ABOUT CTC ALIGNMENT - TODO]
117
 
118
+ This is a work-in-progress basic demo for automatic prosodic annotation in Faroese and Icelandic.
119
+ Contact [email protected] / https://github.com/catiR/ when things break, or with ideas/suggestions about how to apply this.
120
+ The source code is available under the Files tab at the top of the Space.
121
  """
122
  )
123