|
import gradio as gr |
|
import subprocess,os |
|
from datasets import load_dataset, Audio |
|
import corpora |
|
import ctcalign,graph |
|
|
|
|
|
import matplotlib |
|
matplotlib.use('Agg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def setup(): |
|
r0 = subprocess.run(["pwd"], capture_output=True, text=True) |
|
print('PWD::', r0.stdout) |
|
r1 = subprocess.run(["wget", "https://github.com/google/REAPER/archive/refs/heads/master.zip"], capture_output=True, text=True) |
|
print(r1.stdout) |
|
subprocess.run(["unzip", "./master.zip"]) |
|
subprocess.run(["mv", "REAPER-master", "REAPER"]) |
|
subprocess.run(["rm", "./master.zip"]) |
|
os.chdir('./REAPER') |
|
subprocess.run(["mkdir", "build"]) |
|
os.chdir('./build') |
|
r2 = subprocess.run(["cmake", ".."], capture_output=True, text=True) |
|
print(r2.stdout) |
|
r3 = subprocess.run(["make"], capture_output=True, text=True) |
|
print(r3.stdout) |
|
|
|
os.chdir('../..') |
|
r9 = subprocess.run(["ls", "-la"], capture_output=True, text=True) |
|
print('LS::', r9.stdout) |
|
|
|
|
|
|
|
|
|
|
|
def load_lang(langname): |
|
if langname=="Icelandic": |
|
df = corpora.ds_i |
|
model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h" |
|
elif langname =="Faroese": |
|
df = corpora.ds_f |
|
model_path = "carlosdanielhernandezmena/whisper-large-faroese-8k-steps-100h" |
|
|
|
model_word_separator = '|' |
|
model_blank_token = '[PAD]' |
|
lang_aligner = ctcalign.aligner(model_path,model_word_separator,model_blank_token) |
|
|
|
df = df.drop(columns=['audio', 'speaker_id','duration']) |
|
return (df[:10], lang_aligner) |
|
|
|
|
|
def f1(langname,lang_aligner): |
|
if langname=="Icelandic": |
|
df = corpora.ds_i |
|
elif langname =="Faroese": |
|
df = corpora.ds_f |
|
|
|
|
|
fig = plt.figure(figsize=(10,4)) |
|
plt.axline((0,0),slope=1,color="darkgray") |
|
plt.xlabel("Vowel length (ms)") |
|
plt.ylabel("Consonant length (ms)") |
|
|
|
return(fig) |
|
|
|
ds = df.sample() |
|
|
|
print(ds['audio']) |
|
print(ds['normalized_text']) |
|
|
|
|
|
|
|
|
|
bl = gr.Blocks() |
|
|
|
with bl: |
|
|
|
lloadr = gr.Dropdown(["Faroese", "Icelandic"], label="Select a language",value="Icelandic") |
|
|
|
align_func = gr.State(value=ctcalign.aligner(model_path="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h",model_word_separator = '|',model_blank_token = '[PAD]')) |
|
|
|
with gr.Row(): |
|
|
|
databrowser = gr.DataFrame(wrap=True, max_rows=50, interactive=False, overflow_row_behaviour='paginate') |
|
|
|
|
|
|
|
btn1 = gr.Button(value="The random prosody button") |
|
btn1.style(full_width=False, size="sm") |
|
|
|
pl1 = gr.Plot() |
|
|
|
btn1.click(f1, [lloadr,align_func], pl1) |
|
|
|
|
|
|
|
|
|
lloadr.change(load_lang,lloadr,[databrowser,align_func]) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
# ABOUT |
|
This is a work-in-progress demo. |
|
|
|
Icelandic uses the [samromur-asr](https://huggingface.co/datasets/language-and-voice-lab/samromur_asr) corpus |
|
and Faroese uses [ravnursson-asr](https://huggingface.co/datasets/carlosdanielhernandezmena/ravnursson_asr). |
|
|
|
After you select a language, a few example sentences from the corpus are displayed. |
|
|
|
Click the button to view time-aligned prosody information for a random sentence - |
|
this could be any sentence, not only one of the ones shown above. |
|
|
|
ABOUT REAPER |
|
|
|
ABOUT RMSE |
|
|
|
ABOUT CTC ALIGNMENT |
|
|
|
CONTACT INFO ETC |
|
[email protected] / https://github.com/catiR/ |
|
""" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
bl.launch() |