|
{ |
|
"_class_name": "TortoiseTTSPipeline", |
|
"audio_candidate_model": [ |
|
"transformers", |
|
"ClvpModelForConditionalGeneration" |
|
], |
|
"audio_processor": [ |
|
"transformers", |
|
"ClvpFeatureExtractor" |
|
], |
|
"tokenizer": [ |
|
"transformers", |
|
"ClvpTokenizer" |
|
], |
|
"unet": [ |
|
"diffusers.pipelines.tortoise_tts.pipeline_tortoise_tts", |
|
"TortoiseTTSDenoisingModel" |
|
], |
|
"vocoder": [ |
|
"transformers", |
|
"UnivNetModel" |
|
], |
|
"autoregressive_random_latent_converter": [ |
|
"diffusers.pipelines.tortoise_tts.modeling_common", |
|
"RandomLatentConverter" |
|
], |
|
"diffusion_conditioning_encoder": [ |
|
"diffusers.pipelines.tortoise_tts.modeling_diffusion", |
|
"DiffusionConditioningEncoder" |
|
], |
|
"diffusion_random_latent_converter": [ |
|
"diffusers.pipelines.tortoise_tts.modeling_common", |
|
"RandomLatentConverter" |
|
] |
|
} |