from typing import Any, Optional import numpy as np import parselmouth from .f0 import F0Predictor class PM(F0Predictor): def __init__(self, hop_length=512, f0_min=50, f0_max=1100, sampling_rate=44100): super().__init__(hop_length, f0_min, f0_max, sampling_rate) def compute_f0( self, wav: np.ndarray, p_len: Optional[int] = None, filter_radius: Optional[int] = None, ): x = wav if p_len is None: p_len = x.shape[0] // self.hop_length else: assert abs(p_len - x.shape[0] // self.hop_length) < 4, "pad length error" time_step = self.hop_length / self.sampling_rate * 1000 f0 = ( parselmouth.Sound(x, self.sampling_rate) .to_pitch_ac( time_step=time_step / 1000, voicing_threshold=0.6, pitch_floor=self.f0_min, pitch_ceiling=self.f0_max, ) .selected_array["frequency"] ) pad_size = (p_len - len(f0) + 1) // 2 if pad_size > 0 or p_len - len(f0) - pad_size > 0: f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant") return self._interpolate_f0(f0)[0]