File size: 1,061 Bytes
d5ee97c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
#pragma once
#include "ext/CppFlow/include/Model.h"
#include "VoxCommon.hpp"
class FastSpeech2
{
private:
Model* FastSpeech;
public:
FastSpeech2();
FastSpeech2(const std::string& SavedModelFolder);
/*
Initialize and load the model
-> SavedModelFolder: Folder where the .pb, variables, and other characteristics of the exported SavedModel
<- Returns: (bool)Success
*/
bool Initialize(const std::string& SavedModelFolder);
/*
Do inference on a FastSpeech2 model.
-> InputIDs: Input IDs of tokens for inference
-> SpeakerID: ID of the speaker in the model to do inference on. If single speaker, always leave at 0. If multispeaker, refer to your model.
-> Speed, Energy, F0: Parameters for FS2 inference. Leave at 1.f for defaults
<- Returns: TFTensor<float> with shape {1,<len of mel in frames>,80} containing contents of mel spectrogram.
*/
TFTensor<float> DoInference(const std::vector<int32_t>& InputIDs, int32_t SpeakerID = 0, float Speed = 1.f, float Energy = 1.f, float F0 = 1.f,int32_t EmotionID = -1);
~FastSpeech2();
};
|