|
""" from https://github.com/CorentinJ/Real-Time-Voice-Cloning """ |
|
|
|
import numpy as np |
|
|
|
|
|
class Utterance: |
|
def __init__(self, frames_fpath, wave_fpath): |
|
self.frames_fpath = frames_fpath |
|
self.wave_fpath = wave_fpath |
|
|
|
def get_frames(self): |
|
return np.load(self.frames_fpath) |
|
|
|
def random_partial(self, n_frames): |
|
""" |
|
Crops the frames into a partial utterance of n_frames |
|
|
|
:param n_frames: The number of frames of the partial utterance |
|
:return: the partial utterance frames and a tuple indicating the start and end of the |
|
partial utterance in the complete utterance. |
|
""" |
|
frames = self.get_frames() |
|
if frames.shape[0] == n_frames: |
|
start = 0 |
|
else: |
|
start = np.random.randint(0, frames.shape[0] - n_frames) |
|
end = start + n_frames |
|
return frames[start:end], (start, end) |