Spaces:
Sleeping
Sleeping
from pathlib import Path | |
import gradio as gr | |
import pickle | |
import torchaudio | |
import torch | |
from speechbrain.inference.speaker import EncoderClassifier | |
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, collect_chunks | |
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb") | |
with open("gender_classifier.pickle", "rb") as file: | |
gender_clf = pickle.load(file) | |
with open("height_estimator_1.pickle", "rb") as file: | |
male_clf = pickle.load(file) | |
with open("height_estimator_0.pickle", "rb") as file: | |
female_clf = pickle.load(file) | |
article_md = Path("Description.md") | |
error_message = "No speech detected or signal too short!" | |
def read_markdown_file(file_path): | |
with open(file_path, 'r', encoding='utf-8') as file: | |
markdown_string = file.read() | |
return markdown_string | |
def metric_to_imperial(height): | |
inches = round(height / 2.54) | |
return f"{int(inches / 12)}'{inches % 12}\"" | |
def get_speech(wav): | |
model = load_silero_vad() | |
speech_timestamps = get_speech_timestamps(wav, model) | |
return collect_chunks(speech_timestamps, wav) | |
def estimate_height(gender, vad, filepath, imperial): | |
if filepath is None: | |
return error_message | |
signal = read_audio(filepath) | |
if vad: | |
signal = get_speech(signal) | |
if len(signal) < 1: | |
return error_message | |
embedding = torch.squeeze(classifier.encode_batch(signal), 0) | |
if gender == "Detect" or gender is None: | |
gender = gender_clf.predict(embedding) | |
else: | |
gender = 1 if gender == "Male" else 0 | |
height_estimator = male_clf if gender else female_clf | |
height = height_estimator.predict(embedding)[0] | |
if imperial: | |
height = metric_to_imperial(height) | |
else: | |
height = str(round(height)) + " cm" | |
return f"{'Male' if gender else 'Female'} {height}" | |
theme = gr.themes.Glass() | |
with gr.Blocks(theme=theme) as demo: | |
gr.Interface( | |
fn=estimate_height, inputs=[ | |
gr.Radio(["Detect", "Male", "Female"], label="Gender of a speaker", value="Detect"), | |
gr.Checkbox(label="VAD", info="If there is a lot of silence in your audio, maybe try using VAD"), | |
gr.Audio(label="Audio", type="filepath"), | |
gr.Checkbox(label="Imperial units") | |
], | |
outputs=[gr.Label(label="Prediction")], | |
title="Speaker height estimator", | |
description="Demo of estimator trained using [HeightCeleb](https://github.com/stachu86/HeightCeleb) dataset", | |
allow_flagging="never", | |
article=read_markdown_file(article_md) | |
) | |
demo.launch(False, debug=True) |