|
import gradio as gr |
|
import subprocess,os |
|
from datasets import load_dataset, Audio |
|
import datas,ctcalign,graph |
|
from numpy import random |
|
|
|
|
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def setup(): |
|
r0 = subprocess.run(["pwd"], capture_output=True, text=True) |
|
print('PWD::', r0.stdout) |
|
r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) |
|
print(r1.stdout) |
|
subprocess.run(["unzip", "./master.zip"]) |
|
subprocess.run(["mv", "REAPER-master", "REAPER"]) |
|
subprocess.run(["rm", "./master.zip"]) |
|
os.chdir('./REAPER') |
|
subprocess.run(["mkdir", "build"]) |
|
os.chdir('./build') |
|
r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) |
|
print(r2.stdout) |
|
r3 = subprocess.run(["make"], capture_output=True, text=True) |
|
print(r3.stdout) |
|
|
|
os.chdir('../..') |
|
r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) |
|
print('LS::', r9.stdout) |
|
|
|
|
|
|
|
setup() |
|
|
|
def load_lang(langname): |
|
if langname=="Icelandic": |
|
df = datas.ds_i |
|
lang_aligner = datas.a_i |
|
elif langname =="Faroese": |
|
df = datas.ds_f |
|
lang_aligner = datas.a_f |
|
|
|
df = df.data.to_pandas() |
|
df = df.drop(columns=['audio', 'speaker_id','duration']) |
|
return (df[:15], lang_aligner) |
|
|
|
|
|
def f1(langname,lang_aligner): |
|
if langname=="Icelandic": |
|
ds = datas.ds_i |
|
elif langname =="Faroese": |
|
ds = datas.ds_f |
|
|
|
maxdat=len(ds) |
|
|
|
ds = ds.select([random.randint(maxdat-1)]) |
|
sound_path = ds['audio'][0]['path'] |
|
transcript = ds['normalized_text'][0] |
|
return (graph.align_and_graph(sound_path,transcript,lang_aligner),sound_path) |
|
|
|
|
|
bl = gr.Blocks() |
|
|
|
with bl: |
|
|
|
with gr.Row(): |
|
gr.Markdown( |
|
""" |
|
# Demo under construction |
|
## 1. Choose a language to load |
|
## 2. See a small sample of the selected corpus |
|
## 3. Click the button below to view time-aligned prosody information for a random example (from the whole corpus, not necessarily the shown sample) |
|
|
|
Pitch is shown in dark blue and loudness is the light orange line. |
|
The pitch estimation, and the time-alignment of words to audio, are completely automated and there will be some inaccuracy. |
|
More information below. |
|
""" ) |
|
lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Language") |
|
|
|
align_func = gr.State() |
|
|
|
with gr.Row(): |
|
|
|
databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate') |
|
|
|
|
|
with gr.Row(): |
|
btn1 = gr.Button(value="CLICK HERE") |
|
btn1.style(full_width=False) |
|
audio1 = gr.Audio(interactive=False) |
|
|
|
pl1 = gr.Plot() |
|
|
|
btn1.click(f1, [lloadr,align_func], [pl1,audio1]) |
|
|
|
|
|
|
|
|
|
lloadr.change(load_lang,lloadr,[databrowser,align_func]) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
# ABOUT |
|
|
|
The Icelandic corpus is [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr), and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr). |
|
|
|
### Pitch tracking (F0 estimation) |
|
Estimated pitch is shown in blue on the graphs, as tracked by [REAPER](https://github.com/google/REAPER). |
|
|
|
### Intensity |
|
The orange line is root mean squared energy, which reflects loudness and is also a good indication of syllable placement, as it should line up with vowels and similar sounds. |
|
|
|
[ABOUT CTC ALIGNMENT - TODO] |
|
|
|
This is a work-in-progress basic demo for automatic prosodic annotation in Faroese and Icelandic. |
|
Contact [email protected] / https://github.com/catiR/ when things break, or with ideas/suggestions about how to apply this. |
|
The source code is available under the Files tab at the top of the Space. |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
bl.launch() |