Spaces:

Wismut
/

StyleTTS2_Studio

Running

App Files Files Community

Wismut commited on Dec 25, 2024

Commit

0af9841

1 Parent(s): 91c4d57

initial commit

Browse files

Files changed (38) hide show

.gitignore +22 -0
Modules/diffusion/diffusion.py +94 -0
Modules/diffusion/modules.py +693 -0
Modules/diffusion/sampler.py +691 -0
Modules/diffusion/utils.py +82 -0
Modules/hifigan.py +477 -0
Modules/utils.py +14 -0
README.md +10 -1
Utils/ASR/__init__.py +1 -0
Utils/ASR/config.yml +29 -0
Utils/ASR/epoch_00080.pth +3 -0
Utils/ASR/layers.py +354 -0
Utils/ASR/models.py +186 -0
Utils/JDC/__init__.py +1 -0
Utils/JDC/bst.pth +3 -0
Utils/JDC/model.py +190 -0
Utils/PLBERT/config.yml +30 -0
Utils/PLBERT/step_1000000.pth +3 -0
Utils/PLBERT/util.py +42 -0
Utils/config.yml +21 -0
annotated_features.npy +3 -0
app.py +506 -0
espeak_util.py +206 -0
inference.py +315 -0
models.py +611 -0
packages.txt +1 -0
pca/annotated_features.npy +3 -0
pca/annotations.json +1991 -0
pca/generate_pca.py +147 -0
pca/pca_model.pkl +3 -0
pca/voices.json +0 -0
pca_model.pkl +3 -0
pyproject.toml +32 -0
requirements.txt +21 -0
text2speech.py +598 -0
uv.lock +0 -0
voices.json +2840 -0
voices_.json.example +2840 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+random_voices.json
+mydata/
+batch_output/
+.env
+# Ommit the DS_Store folder automatically created by macOS
+.DS_Store/
+# python virtual environment folder
+.venv/
+.vscode/
+# process log file
+process_log.txt*
+process_log.txt
+# Python cache
+__pycache__/
+/*/__pycache__/
+/*/*/__pycache__/
+github
+github.pub

Modules/diffusion/diffusion.py ADDED Viewed

	@@ -0,0 +1,94 @@

+from math import pi
+from random import randint
+from typing import Any, Optional, Sequence, Tuple, Union
+import torch
+from einops import rearrange
+from torch import Tensor, nn
+from tqdm import tqdm
+from .utils import *
+from .sampler import *
+"""
+Diffusion Classes (generic for 1d data)
+"""
+class Model1d(nn.Module):
+    def __init__(self, unet_type: str = "base", **kwargs):
+        super().__init__()
+        diffusion_kwargs, kwargs = groupby("diffusion_", kwargs)
+        self.unet = None
+        self.diffusion = None
+    def forward(self, x: Tensor, **kwargs) -> Tensor:
+        return self.diffusion(x, **kwargs)
+    def sample(self, *args, **kwargs) -> Tensor:
+        return self.diffusion.sample(*args, **kwargs)
+"""
+Audio Diffusion Classes (specific for 1d audio data)
+"""
+def get_default_model_kwargs():
+    return dict(
+        channels=128,
+        patch_size=16,
+        multipliers=[1, 2, 4, 4, 4, 4, 4],
+        factors=[4, 4, 4, 2, 2, 2],
+        num_blocks=[2, 2, 2, 2, 2, 2],
+        attentions=[0, 0, 0, 1, 1, 1, 1],
+        attention_heads=8,
+        attention_features=64,
+        attention_multiplier=2,
+        attention_use_rel_pos=False,
+        diffusion_type="v",
+        diffusion_sigma_distribution=UniformDistribution(),
+    )
+def get_default_sampling_kwargs():
+    return dict(sigma_schedule=LinearSchedule(), sampler=VSampler(), clamp=True)
+class AudioDiffusionModel(Model1d):
+    def __init__(self, **kwargs):
+        super().__init__(**{**get_default_model_kwargs(), **kwargs})
+    def sample(self, *args, **kwargs):
+        return super().sample(*args, **{**get_default_sampling_kwargs(), **kwargs})
+class AudioDiffusionConditional(Model1d):
+    def __init__(
+        self,
+        embedding_features: int,
+        embedding_max_length: int,
+        embedding_mask_proba: float = 0.1,
+        **kwargs,
+    ):
+        self.embedding_mask_proba = embedding_mask_proba
+        default_kwargs = dict(
+            **get_default_model_kwargs(),
+            unet_type="cfg",
+            context_embedding_features=embedding_features,
+            context_embedding_max_length=embedding_max_length,
+        )
+        super().__init__(**{**default_kwargs, **kwargs})
+    def forward(self, *args, **kwargs):
+        default_kwargs = dict(embedding_mask_proba=self.embedding_mask_proba)
+        return super().forward(*args, **{**default_kwargs, **kwargs})
+    def sample(self, *args, **kwargs):
+        default_kwargs = dict(
+            **get_default_sampling_kwargs(),
+            embedding_scale=5.0,
+        )
+        return super().sample(*args, **{**default_kwargs, **kwargs})

Modules/diffusion/modules.py ADDED Viewed

	@@ -0,0 +1,693 @@

+from math import floor, log, pi
+from typing import Any, List, Optional, Sequence, Tuple, Union
+from .utils import *
+import torch
+import torch.nn as nn
+from einops import rearrange, reduce, repeat
+from einops.layers.torch import Rearrange
+from einops_exts import rearrange_many
+from torch import Tensor, einsum
+"""
+Utils
+"""
+class AdaLayerNorm(nn.Module):
+    def __init__(self, style_dim, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+        self.fc = nn.Linear(style_dim, channels*2)
+    def forward(self, x, s):
+        x = x.transpose(-1, -2)
+        x = x.transpose(1, -1)
+        h = self.fc(s)
+        h = h.view(h.size(0), h.size(1), 1)
+        gamma, beta = torch.chunk(h, chunks=2, dim=1)
+        gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1)
+        x = F.layer_norm(x, (self.channels,), eps=self.eps)
+        x = (1 + gamma) * x + beta
+        return x.transpose(1, -1).transpose(-1, -2)
+class StyleTransformer1d(nn.Module):
+    def __init__(
+        self,
+        num_layers: int,
+        channels: int,
+        num_heads: int,
+        head_features: int,
+        multiplier: int,
+        use_context_time: bool = True,
+        use_rel_pos: bool = False,
+        context_features_multiplier: int = 1,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+        context_features: Optional[int] = None,
+        context_embedding_features: Optional[int] = None,
+        embedding_max_length: int = 512,
+    ):
+        super().__init__()
+        self.blocks = nn.ModuleList(
+            [
+                StyleTransformerBlock(
+                    features=channels + context_embedding_features,
+                    head_features=head_features,
+                    num_heads=num_heads,
+                    multiplier=multiplier,
+                    style_dim=context_features,
+                    use_rel_pos=use_rel_pos,
+                    rel_pos_num_buckets=rel_pos_num_buckets,
+                    rel_pos_max_distance=rel_pos_max_distance,
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.to_out = nn.Sequential(
+            Rearrange("b t c -> b c t"),
+            nn.Conv1d(
+                in_channels=channels + context_embedding_features,
+                out_channels=channels,
+                kernel_size=1,
+            ),
+        )
+        use_context_features = exists(context_features)
+        self.use_context_features = use_context_features
+        self.use_context_time = use_context_time
+        if use_context_time or use_context_features:
+            context_mapping_features = channels + context_embedding_features
+            self.to_mapping = nn.Sequential(
+                nn.Linear(context_mapping_features, context_mapping_features),
+                nn.GELU(),
+                nn.Linear(context_mapping_features, context_mapping_features),
+                nn.GELU(),
+            )
+        if use_context_time:
+            assert exists(context_mapping_features)
+            self.to_time = nn.Sequential(
+                TimePositionalEmbedding(
+                    dim=channels, out_features=context_mapping_features
+                ),
+                nn.GELU(),
+            )
+        if use_context_features:
+            assert exists(context_features) and exists(context_mapping_features)
+            self.to_features = nn.Sequential(
+                nn.Linear(
+                    in_features=context_features, out_features=context_mapping_features
+                ),
+                nn.GELU(),
+            )
+        self.fixed_embedding = FixedEmbedding(
+            max_length=embedding_max_length, features=context_embedding_features
+        )
+    def get_mapping(
+        self, time: Optional[Tensor] = None, features: Optional[Tensor] = None
+    ) -> Optional[Tensor]:
+        """Combines context time features and features into mapping"""
+        items, mapping = [], None
+        # Compute time features
+        if self.use_context_time:
+            assert_message = "use_context_time=True but no time features provided"
+            assert exists(time), assert_message
+            items += [self.to_time(time)]
+        # Compute features
+        if self.use_context_features:
+            assert_message = "context_features exists but no features provided"
+            assert exists(features), assert_message
+            items += [self.to_features(features)]
+        # Compute joint mapping
+        if self.use_context_time or self.use_context_features:
+            mapping = reduce(torch.stack(items), "n b m -> b m", "sum")
+            mapping = self.to_mapping(mapping)
+        return mapping
+    def run(self, x, time, embedding, features):
+        mapping = self.get_mapping(time, features)
+        x = torch.cat([x.expand(-1, embedding.size(1), -1), embedding], axis=-1)
+        mapping = mapping.unsqueeze(1).expand(-1, embedding.size(1), -1)
+        for block in self.blocks:
+            x = x + mapping
+            x = block(x, features)
+        x = x.mean(axis=1).unsqueeze(1)
+        x = self.to_out(x)
+        x = x.transpose(-1, -2)
+        return x
+    def forward(self, x: Tensor,
+                time: Tensor,
+                embedding_mask_proba: float = 0.0,
+                embedding: Optional[Tensor] = None,
+                features: Optional[Tensor] = None,
+               embedding_scale: float = 1.0) -> Tensor:
+        b, device = embedding.shape[0], embedding.device
+        fixed_embedding = self.fixed_embedding(embedding)
+        if embedding_mask_proba > 0.0:
+            # Randomly mask embedding
+            batch_mask = rand_bool(
+                shape=(b, 1, 1), proba=embedding_mask_proba, device=device
+            )
+            embedding = torch.where(batch_mask, fixed_embedding, embedding)
+        if embedding_scale != 1.0:
+            # Compute both normal and fixed embedding outputs
+            out = self.run(x, time, embedding=embedding, features=features)
+            out_masked = self.run(x, time, embedding=fixed_embedding, features=features)
+            # Scale conditional output using classifier-free guidance
+            return out_masked + (out - out_masked) * embedding_scale
+        else:
+            return self.run(x, time, embedding=embedding, features=features)
+        return x
+class StyleTransformerBlock(nn.Module):
+    def __init__(
+        self,
+        features: int,
+        num_heads: int,
+        head_features: int,
+        style_dim: int,
+        multiplier: int,
+        use_rel_pos: bool,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+        context_features: Optional[int] = None,
+    ):
+        super().__init__()
+        self.use_cross_attention = exists(context_features) and context_features > 0
+        self.attention = StyleAttention(
+            features=features,
+            style_dim=style_dim,
+            num_heads=num_heads,
+            head_features=head_features,
+            use_rel_pos=use_rel_pos,
+            rel_pos_num_buckets=rel_pos_num_buckets,
+            rel_pos_max_distance=rel_pos_max_distance,
+        )
+        if self.use_cross_attention:
+            self.cross_attention = StyleAttention(
+                features=features,
+                style_dim=style_dim,
+                num_heads=num_heads,
+                head_features=head_features,
+                context_features=context_features,
+                use_rel_pos=use_rel_pos,
+                rel_pos_num_buckets=rel_pos_num_buckets,
+                rel_pos_max_distance=rel_pos_max_distance,
+            )
+        self.feed_forward = FeedForward(features=features, multiplier=multiplier)
+    def forward(self, x: Tensor, s: Tensor, *, context: Optional[Tensor] = None) -> Tensor:
+        x = self.attention(x, s) + x
+        if self.use_cross_attention:
+            x = self.cross_attention(x, s, context=context) + x
+        x = self.feed_forward(x) + x
+        return x
+class StyleAttention(nn.Module):
+    def __init__(
+        self,
+        features: int,
+        *,
+        style_dim: int,
+        head_features: int,
+        num_heads: int,
+        context_features: Optional[int] = None,
+        use_rel_pos: bool,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+    ):
+        super().__init__()
+        self.context_features = context_features
+        mid_features = head_features * num_heads
+        context_features = default(context_features, features)
+        self.norm = AdaLayerNorm(style_dim, features)
+        self.norm_context = AdaLayerNorm(style_dim, context_features)
+        self.to_q = nn.Linear(
+            in_features=features, out_features=mid_features, bias=False
+        )
+        self.to_kv = nn.Linear(
+            in_features=context_features, out_features=mid_features * 2, bias=False
+        )
+        self.attention = AttentionBase(
+            features,
+            num_heads=num_heads,
+            head_features=head_features,
+            use_rel_pos=use_rel_pos,
+            rel_pos_num_buckets=rel_pos_num_buckets,
+            rel_pos_max_distance=rel_pos_max_distance,
+        )
+    def forward(self, x: Tensor, s: Tensor, *, context: Optional[Tensor] = None) -> Tensor:
+        assert_message = "You must provide a context when using context_features"
+        assert not self.context_features or exists(context), assert_message
+        # Use context if provided
+        context = default(context, x)
+        # Normalize then compute q from input and k,v from context
+        x, context = self.norm(x, s), self.norm_context(context, s)
+        q, k, v = (self.to_q(x), *torch.chunk(self.to_kv(context), chunks=2, dim=-1))
+        # Compute and return attention
+        return self.attention(q, k, v)
+class Transformer1d(nn.Module):
+    def __init__(
+        self,
+        num_layers: int,
+        channels: int,
+        num_heads: int,
+        head_features: int,
+        multiplier: int,
+        use_context_time: bool = True,
+        use_rel_pos: bool = False,
+        context_features_multiplier: int = 1,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+        context_features: Optional[int] = None,
+        context_embedding_features: Optional[int] = None,
+        embedding_max_length: int = 512,
+    ):
+        super().__init__()
+        self.blocks = nn.ModuleList(
+            [
+                TransformerBlock(
+                    features=channels + context_embedding_features,
+                    head_features=head_features,
+                    num_heads=num_heads,
+                    multiplier=multiplier,
+                    use_rel_pos=use_rel_pos,
+                    rel_pos_num_buckets=rel_pos_num_buckets,
+                    rel_pos_max_distance=rel_pos_max_distance,
+                )
+                for i in range(num_layers)
+            ]
+        )
+        self.to_out = nn.Sequential(
+            Rearrange("b t c -> b c t"),
+            nn.Conv1d(
+                in_channels=channels + context_embedding_features,
+                out_channels=channels,
+                kernel_size=1,
+            ),
+        )
+        use_context_features = exists(context_features)
+        self.use_context_features = use_context_features
+        self.use_context_time = use_context_time
+        if use_context_time or use_context_features:
+            context_mapping_features = channels + context_embedding_features
+            self.to_mapping = nn.Sequential(
+                nn.Linear(context_mapping_features, context_mapping_features),
+                nn.GELU(),
+                nn.Linear(context_mapping_features, context_mapping_features),
+                nn.GELU(),
+            )
+        if use_context_time:
+            assert exists(context_mapping_features)
+            self.to_time = nn.Sequential(
+                TimePositionalEmbedding(
+                    dim=channels, out_features=context_mapping_features
+                ),
+                nn.GELU(),
+            )
+        if use_context_features:
+            assert exists(context_features) and exists(context_mapping_features)
+            self.to_features = nn.Sequential(
+                nn.Linear(
+                    in_features=context_features, out_features=context_mapping_features
+                ),
+                nn.GELU(),
+            )
+        self.fixed_embedding = FixedEmbedding(
+            max_length=embedding_max_length, features=context_embedding_features
+        )
+    def get_mapping(
+        self, time: Optional[Tensor] = None, features: Optional[Tensor] = None
+    ) -> Optional[Tensor]:
+        """Combines context time features and features into mapping"""
+        items, mapping = [], None
+        # Compute time features
+        if self.use_context_time:
+            assert_message = "use_context_time=True but no time features provided"
+            assert exists(time), assert_message
+            items += [self.to_time(time)]
+        # Compute features
+        if self.use_context_features:
+            assert_message = "context_features exists but no features provided"
+            assert exists(features), assert_message
+            items += [self.to_features(features)]
+        # Compute joint mapping
+        if self.use_context_time or self.use_context_features:
+            mapping = reduce(torch.stack(items), "n b m -> b m", "sum")
+            mapping = self.to_mapping(mapping)
+        return mapping
+    def run(self, x, time, embedding, features):
+        mapping = self.get_mapping(time, features)
+        x = torch.cat([x.expand(-1, embedding.size(1), -1), embedding], axis=-1)
+        mapping = mapping.unsqueeze(1).expand(-1, embedding.size(1), -1)
+        for block in self.blocks:
+            x = x + mapping
+            x = block(x)
+        x = x.mean(axis=1).unsqueeze(1)
+        x = self.to_out(x)
+        x = x.transpose(-1, -2)
+        return x
+    def forward(self, x: Tensor,
+                time: Tensor,
+                embedding_mask_proba: float = 0.0,
+                embedding: Optional[Tensor] = None,
+                features: Optional[Tensor] = None,
+               embedding_scale: float = 1.0) -> Tensor:
+        b, device = embedding.shape[0], embedding.device
+        fixed_embedding = self.fixed_embedding(embedding)
+        if embedding_mask_proba > 0.0:
+            # Randomly mask embedding
+            batch_mask = rand_bool(
+                shape=(b, 1, 1), proba=embedding_mask_proba, device=device
+            )
+            embedding = torch.where(batch_mask, fixed_embedding, embedding)
+        if embedding_scale != 1.0:
+            # Compute both normal and fixed embedding outputs
+            out = self.run(x, time, embedding=embedding, features=features)
+            out_masked = self.run(x, time, embedding=fixed_embedding, features=features)
+            # Scale conditional output using classifier-free guidance
+            return out_masked + (out - out_masked) * embedding_scale
+        else:
+            return self.run(x, time, embedding=embedding, features=features)
+        return x
+"""
+Attention Components
+"""
+class RelativePositionBias(nn.Module):
+    def __init__(self, num_buckets: int, max_distance: int, num_heads: int):
+        super().__init__()
+        self.num_buckets = num_buckets
+        self.max_distance = max_distance
+        self.num_heads = num_heads
+        self.relative_attention_bias = nn.Embedding(num_buckets, num_heads)
+    @staticmethod
+    def _relative_position_bucket(
+        relative_position: Tensor, num_buckets: int, max_distance: int
+    ):
+        num_buckets //= 2
+        ret = (relative_position >= 0).to(torch.long) * num_buckets
+        n = torch.abs(relative_position)
+        max_exact = num_buckets // 2
+        is_small = n < max_exact
+        val_if_large = (
+            max_exact
+            + (
+                torch.log(n.float() / max_exact)
+                / log(max_distance / max_exact)
+                * (num_buckets - max_exact)
+            ).long()
+        )
+        val_if_large = torch.min(
+            val_if_large, torch.full_like(val_if_large, num_buckets - 1)
+        )
+        ret += torch.where(is_small, n, val_if_large)
+        return ret
+    def forward(self, num_queries: int, num_keys: int) -> Tensor:
+        i, j, device = num_queries, num_keys, self.relative_attention_bias.weight.device
+        q_pos = torch.arange(j - i, j, dtype=torch.long, device=device)
+        k_pos = torch.arange(j, dtype=torch.long, device=device)
+        rel_pos = rearrange(k_pos, "j -> 1 j") - rearrange(q_pos, "i -> i 1")
+        relative_position_bucket = self._relative_position_bucket(
+            rel_pos, num_buckets=self.num_buckets, max_distance=self.max_distance
+        )
+        bias = self.relative_attention_bias(relative_position_bucket)
+        bias = rearrange(bias, "m n h -> 1 h m n")
+        return bias
+def FeedForward(features: int, multiplier: int) -> nn.Module:
+    mid_features = features * multiplier
+    return nn.Sequential(
+        nn.Linear(in_features=features, out_features=mid_features),
+        nn.GELU(),
+        nn.Linear(in_features=mid_features, out_features=features),
+    )
+class AttentionBase(nn.Module):
+    def __init__(
+        self,
+        features: int,
+        *,
+        head_features: int,
+        num_heads: int,
+        use_rel_pos: bool,
+        out_features: Optional[int] = None,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+    ):
+        super().__init__()
+        self.scale = head_features ** -0.5
+        self.num_heads = num_heads
+        self.use_rel_pos = use_rel_pos
+        mid_features = head_features * num_heads
+        if use_rel_pos:
+            assert exists(rel_pos_num_buckets) and exists(rel_pos_max_distance)
+            self.rel_pos = RelativePositionBias(
+                num_buckets=rel_pos_num_buckets,
+                max_distance=rel_pos_max_distance,
+                num_heads=num_heads,
+            )
+        if out_features is None:
+            out_features = features
+        self.to_out = nn.Linear(in_features=mid_features, out_features=out_features)
+    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
+        # Split heads
+        q, k, v = rearrange_many((q, k, v), "b n (h d) -> b h n d", h=self.num_heads)
+        # Compute similarity matrix
+        sim = einsum("... n d, ... m d -> ... n m", q, k)
+        sim = (sim + self.rel_pos(*sim.shape[-2:])) if self.use_rel_pos else sim
+        sim = sim * self.scale
+        # Get attention matrix with softmax
+        attn = sim.softmax(dim=-1)
+        # Compute values
+        out = einsum("... n m, ... m d -> ... n d", attn, v)
+        out = rearrange(out, "b h n d -> b n (h d)")
+        return self.to_out(out)
+class Attention(nn.Module):
+    def __init__(
+        self,
+        features: int,
+        *,
+        head_features: int,
+        num_heads: int,
+        out_features: Optional[int] = None,
+        context_features: Optional[int] = None,
+        use_rel_pos: bool,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+    ):
+        super().__init__()
+        self.context_features = context_features
+        mid_features = head_features * num_heads
+        context_features = default(context_features, features)
+        self.norm = nn.LayerNorm(features)
+        self.norm_context = nn.LayerNorm(context_features)
+        self.to_q = nn.Linear(
+            in_features=features, out_features=mid_features, bias=False
+        )
+        self.to_kv = nn.Linear(
+            in_features=context_features, out_features=mid_features * 2, bias=False
+        )
+        self.attention = AttentionBase(
+            features,
+            out_features=out_features,
+            num_heads=num_heads,
+            head_features=head_features,
+            use_rel_pos=use_rel_pos,
+            rel_pos_num_buckets=rel_pos_num_buckets,
+            rel_pos_max_distance=rel_pos_max_distance,
+        )
+    def forward(self, x: Tensor, *, context: Optional[Tensor] = None) -> Tensor:
+        assert_message = "You must provide a context when using context_features"
+        assert not self.context_features or exists(context), assert_message
+        # Use context if provided
+        context = default(context, x)
+        # Normalize then compute q from input and k,v from context
+        x, context = self.norm(x), self.norm_context(context)
+        q, k, v = (self.to_q(x), *torch.chunk(self.to_kv(context), chunks=2, dim=-1))
+        # Compute and return attention
+        return self.attention(q, k, v)
+"""
+Transformer Blocks
+"""
+class TransformerBlock(nn.Module):
+    def __init__(
+        self,
+        features: int,
+        num_heads: int,
+        head_features: int,
+        multiplier: int,
+        use_rel_pos: bool,
+        rel_pos_num_buckets: Optional[int] = None,
+        rel_pos_max_distance: Optional[int] = None,
+        context_features: Optional[int] = None,
+    ):
+        super().__init__()
+        self.use_cross_attention = exists(context_features) and context_features > 0
+        self.attention = Attention(
+            features=features,
+            num_heads=num_heads,
+            head_features=head_features,
+            use_rel_pos=use_rel_pos,
+            rel_pos_num_buckets=rel_pos_num_buckets,
+            rel_pos_max_distance=rel_pos_max_distance,
+        )
+        if self.use_cross_attention:
+            self.cross_attention = Attention(
+                features=features,
+                num_heads=num_heads,
+                head_features=head_features,
+                context_features=context_features,
+                use_rel_pos=use_rel_pos,
+                rel_pos_num_buckets=rel_pos_num_buckets,
+                rel_pos_max_distance=rel_pos_max_distance,
+            )
+        self.feed_forward = FeedForward(features=features, multiplier=multiplier)
+    def forward(self, x: Tensor, *, context: Optional[Tensor] = None) -> Tensor:
+        x = self.attention(x) + x
+        if self.use_cross_attention:
+            x = self.cross_attention(x, context=context) + x
+        x = self.feed_forward(x) + x
+        return x
+"""
+Time Embeddings
+"""
+class SinusoidalEmbedding(nn.Module):
+    def __init__(self, dim: int):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x: Tensor) -> Tensor:
+        device, half_dim = x.device, self.dim // 2
+        emb = torch.tensor(log(10000) / (half_dim - 1), device=device)
+        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
+        emb = rearrange(x, "i -> i 1") * rearrange(emb, "j -> 1 j")
+        return torch.cat((emb.sin(), emb.cos()), dim=-1)
+class LearnedPositionalEmbedding(nn.Module):
+    """Used for continuous time"""
+    def __init__(self, dim: int):
+        super().__init__()
+        assert (dim % 2) == 0
+        half_dim = dim // 2
+        self.weights = nn.Parameter(torch.randn(half_dim))
+    def forward(self, x: Tensor) -> Tensor:
+        x = rearrange(x, "b -> b 1")
+        freqs = x * rearrange(self.weights, "d -> 1 d") * 2 * pi
+        fouriered = torch.cat((freqs.sin(), freqs.cos()), dim=-1)
+        fouriered = torch.cat((x, fouriered), dim=-1)
+        return fouriered
+def TimePositionalEmbedding(dim: int, out_features: int) -> nn.Module:
+    return nn.Sequential(
+        LearnedPositionalEmbedding(dim),
+        nn.Linear(in_features=dim + 1, out_features=out_features),
+    )
+class FixedEmbedding(nn.Module):
+    def __init__(self, max_length: int, features: int):
+        super().__init__()
+        self.max_length = max_length
+        self.embedding = nn.Embedding(max_length, features)
+    def forward(self, x: Tensor) -> Tensor:
+        batch_size, length, device = *x.shape[0:2], x.device
+        assert_message = "Input sequence length must be <= max_length"
+        assert length <= self.max_length, assert_message
+        position = torch.arange(length, device=device)
+        fixed_embedding = self.embedding(position)
+        fixed_embedding = repeat(fixed_embedding, "n d -> b n d", b=batch_size)
+        return fixed_embedding

Modules/diffusion/sampler.py ADDED Viewed

	@@ -0,0 +1,691 @@

+from math import atan, cos, pi, sin, sqrt
+from typing import Any, Callable, List, Optional, Tuple, Type
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange, reduce
+from torch import Tensor
+from .utils import *
+"""
+Diffusion Training
+"""
+""" Distributions """
+class Distribution:
+    def __call__(self, num_samples: int, device: torch.device):
+        raise NotImplementedError()
+class LogNormalDistribution(Distribution):
+    def __init__(self, mean: float, std: float):
+        self.mean = mean
+        self.std = std
+    def __call__(
+        self, num_samples: int, device: torch.device = torch.device("cpu")
+    ) -> Tensor:
+        normal = self.mean + self.std * torch.randn((num_samples,), device=device)
+        return normal.exp()
+class UniformDistribution(Distribution):
+    def __call__(self, num_samples: int, device: torch.device = torch.device("cpu")):
+        return torch.rand(num_samples, device=device)
+class VKDistribution(Distribution):
+    def __init__(
+        self,
+        min_value: float = 0.0,
+        max_value: float = float("inf"),
+        sigma_data: float = 1.0,
+    ):
+        self.min_value = min_value
+        self.max_value = max_value
+        self.sigma_data = sigma_data
+    def __call__(
+        self, num_samples: int, device: torch.device = torch.device("cpu")
+    ) -> Tensor:
+        sigma_data = self.sigma_data
+        min_cdf = atan(self.min_value / sigma_data) * 2 / pi
+        max_cdf = atan(self.max_value / sigma_data) * 2 / pi
+        u = (max_cdf - min_cdf) * torch.randn((num_samples,), device=device) + min_cdf
+        return torch.tan(u * pi / 2) * sigma_data
+""" Diffusion Classes """
+def pad_dims(x: Tensor, ndim: int) -> Tensor:
+    # Pads additional ndims to the right of the tensor
+    return x.view(*x.shape, *((1,) * ndim))
+def clip(x: Tensor, dynamic_threshold: float = 0.0):
+    if dynamic_threshold == 0.0:
+        return x.clamp(-1.0, 1.0)
+    else:
+        # Dynamic thresholding
+        # Find dynamic threshold quantile for each batch
+        x_flat = rearrange(x, "b ... -> b (...)")
+        scale = torch.quantile(x_flat.abs(), dynamic_threshold, dim=-1)
+        # Clamp to a min of 1.0
+        scale.clamp_(min=1.0)
+        # Clamp all values and scale
+        scale = pad_dims(scale, ndim=x.ndim - scale.ndim)
+        x = x.clamp(-scale, scale) / scale
+        return x
+def to_batch(
+    batch_size: int,
+    device: torch.device,
+    x: Optional[float] = None,
+    xs: Optional[Tensor] = None,
+) -> Tensor:
+    assert exists(x) ^ exists(xs), "Either x or xs must be provided"
+    # If x provided use the same for all batch items
+    if exists(x):
+        xs = torch.full(size=(batch_size,), fill_value=x).to(device)
+    assert exists(xs)
+    return xs
+class Diffusion(nn.Module):
+    alias: str = ""
+    """Base diffusion class"""
+    def denoise_fn(
+        self,
+        x_noisy: Tensor,
+        sigmas: Optional[Tensor] = None,
+        sigma: Optional[float] = None,
+        **kwargs,
+    ) -> Tensor:
+        raise NotImplementedError("Diffusion class missing denoise_fn")
+    def forward(self, x: Tensor, noise: Tensor = None, **kwargs) -> Tensor:
+        raise NotImplementedError("Diffusion class missing forward function")
+class VDiffusion(Diffusion):
+    alias = "v"
+    def __init__(self, net: nn.Module, *, sigma_distribution: Distribution):
+        super().__init__()
+        self.net = net
+        self.sigma_distribution = sigma_distribution
+    def get_alpha_beta(self, sigmas: Tensor) -> Tuple[Tensor, Tensor]:
+        angle = sigmas * pi / 2
+        alpha = torch.cos(angle)
+        beta = torch.sin(angle)
+        return alpha, beta
+    def denoise_fn(
+        self,
+        x_noisy: Tensor,
+        sigmas: Optional[Tensor] = None,
+        sigma: Optional[float] = None,
+        **kwargs,
+    ) -> Tensor:
+        batch_size, device = x_noisy.shape[0], x_noisy.device
+        sigmas = to_batch(x=sigma, xs=sigmas, batch_size=batch_size, device=device)
+        return self.net(x_noisy, sigmas, **kwargs)
+    def forward(self, x: Tensor, noise: Tensor = None, **kwargs) -> Tensor:
+        batch_size, device = x.shape[0], x.device
+        # Sample amount of noise to add for each batch element
+        sigmas = self.sigma_distribution(num_samples=batch_size, device=device)
+        sigmas_padded = rearrange(sigmas, "b -> b 1 1")
+        # Get noise
+        noise = default(noise, lambda: torch.randn_like(x))
+        # Combine input and noise weighted by half-circle
+        alpha, beta = self.get_alpha_beta(sigmas_padded)
+        x_noisy = x * alpha + noise * beta
+        x_target = noise * alpha - x * beta
+        # Denoise and return loss
+        x_denoised = self.denoise_fn(x_noisy, sigmas, **kwargs)
+        return F.mse_loss(x_denoised, x_target)
+class KDiffusion(Diffusion):
+    """Elucidated Diffusion (Karras et al. 2022): https://arxiv.org/abs/2206.00364"""
+    alias = "k"
+    def __init__(
+        self,
+        net: nn.Module,
+        *,
+        sigma_distribution: Distribution,
+        sigma_data: float,  # data distribution standard deviation
+        dynamic_threshold: float = 0.0,
+    ):
+        super().__init__()
+        self.net = net
+        self.sigma_data = sigma_data
+        self.sigma_distribution = sigma_distribution
+        self.dynamic_threshold = dynamic_threshold
+    def get_scale_weights(self, sigmas: Tensor) -> Tuple[Tensor, ...]:
+        sigma_data = self.sigma_data
+        c_noise = torch.log(sigmas) * 0.25
+        sigmas = rearrange(sigmas, "b -> b 1 1")
+        c_skip = (sigma_data ** 2) / (sigmas ** 2 + sigma_data ** 2)
+        c_out = sigmas * sigma_data * (sigma_data ** 2 + sigmas ** 2) ** -0.5
+        c_in = (sigmas ** 2 + sigma_data ** 2) ** -0.5
+        return c_skip, c_out, c_in, c_noise
+    def denoise_fn(
+        self,
+        x_noisy: Tensor,
+        sigmas: Optional[Tensor] = None,
+        sigma: Optional[float] = None,
+        **kwargs,
+    ) -> Tensor:
+        batch_size, device = x_noisy.shape[0], x_noisy.device
+        sigmas = to_batch(x=sigma, xs=sigmas, batch_size=batch_size, device=device)
+        # Predict network output and add skip connection
+        c_skip, c_out, c_in, c_noise = self.get_scale_weights(sigmas)
+        x_pred = self.net(c_in * x_noisy, c_noise, **kwargs)
+        x_denoised = c_skip * x_noisy + c_out * x_pred
+        return x_denoised
+    def loss_weight(self, sigmas: Tensor) -> Tensor:
+        # Computes weight depending on data distribution
+        return (sigmas ** 2 + self.sigma_data ** 2) * (sigmas * self.sigma_data) ** -2
+    def forward(self, x: Tensor, noise: Tensor = None, **kwargs) -> Tensor:
+        batch_size, device = x.shape[0], x.device
+        from einops import rearrange, reduce
+        # Sample amount of noise to add for each batch element
+        sigmas = self.sigma_distribution(num_samples=batch_size, device=device)
+        sigmas_padded = rearrange(sigmas, "b -> b 1 1")
+        # Add noise to input
+        noise = default(noise, lambda: torch.randn_like(x))
+        x_noisy = x + sigmas_padded * noise
+        # Compute denoised values
+        x_denoised = self.denoise_fn(x_noisy, sigmas=sigmas, **kwargs)
+        # Compute weighted loss
+        losses = F.mse_loss(x_denoised, x, reduction="none")
+        losses = reduce(losses, "b ... -> b", "mean")
+        losses = losses * self.loss_weight(sigmas)
+        loss = losses.mean()
+        return loss
+class VKDiffusion(Diffusion):
+    alias = "vk"
+    def __init__(self, net: nn.Module, *, sigma_distribution: Distribution):
+        super().__init__()
+        self.net = net
+        self.sigma_distribution = sigma_distribution
+    def get_scale_weights(self, sigmas: Tensor) -> Tuple[Tensor, ...]:
+        sigma_data = 1.0
+        sigmas = rearrange(sigmas, "b -> b 1 1")
+        c_skip = (sigma_data ** 2) / (sigmas ** 2 + sigma_data ** 2)
+        c_out = -sigmas * sigma_data * (sigma_data ** 2 + sigmas ** 2) ** -0.5
+        c_in = (sigmas ** 2 + sigma_data ** 2) ** -0.5
+        return c_skip, c_out, c_in
+    def sigma_to_t(self, sigmas: Tensor) -> Tensor:
+        return sigmas.atan() / pi * 2
+    def t_to_sigma(self, t: Tensor) -> Tensor:
+        return (t * pi / 2).tan()
+    def denoise_fn(
+        self,
+        x_noisy: Tensor,
+        sigmas: Optional[Tensor] = None,
+        sigma: Optional[float] = None,
+        **kwargs,
+    ) -> Tensor:
+        batch_size, device = x_noisy.shape[0], x_noisy.device
+        sigmas = to_batch(x=sigma, xs=sigmas, batch_size=batch_size, device=device)
+        # Predict network output and add skip connection
+        c_skip, c_out, c_in = self.get_scale_weights(sigmas)
+        x_pred = self.net(c_in * x_noisy, self.sigma_to_t(sigmas), **kwargs)
+        x_denoised = c_skip * x_noisy + c_out * x_pred
+        return x_denoised
+    def forward(self, x: Tensor, noise: Tensor = None, **kwargs) -> Tensor:
+        batch_size, device = x.shape[0], x.device
+        # Sample amount of noise to add for each batch element
+        sigmas = self.sigma_distribution(num_samples=batch_size, device=device)
+        sigmas_padded = rearrange(sigmas, "b -> b 1 1")
+        # Add noise to input
+        noise = default(noise, lambda: torch.randn_like(x))
+        x_noisy = x + sigmas_padded * noise
+        # Compute model output
+        c_skip, c_out, c_in = self.get_scale_weights(sigmas)
+        x_pred = self.net(c_in * x_noisy, self.sigma_to_t(sigmas), **kwargs)
+        # Compute v-objective target
+        v_target = (x - c_skip * x_noisy) / (c_out + 1e-7)
+        # Compute loss
+        loss = F.mse_loss(x_pred, v_target)
+        return loss
+"""
+Diffusion Sampling
+"""
+""" Schedules """
+class Schedule(nn.Module):
+    """Interface used by different sampling schedules"""
+    def forward(self, num_steps: int, device: torch.device) -> Tensor:
+        raise NotImplementedError()
+class LinearSchedule(Schedule):
+    def forward(self, num_steps: int, device: Any) -> Tensor:
+        sigmas = torch.linspace(1, 0, num_steps + 1)[:-1]
+        return sigmas
+class KarrasSchedule(Schedule):
+    """https://arxiv.org/abs/2206.00364 equation 5"""
+    def __init__(self, sigma_min: float, sigma_max: float, rho: float = 7.0):
+        super().__init__()
+        self.sigma_min = sigma_min
+        self.sigma_max = sigma_max
+        self.rho = rho
+    def forward(self, num_steps: int, device: Any) -> Tensor:
+        rho_inv = 1.0 / self.rho
+        steps = torch.arange(num_steps, device=device, dtype=torch.float32)
+        sigmas = (
+            self.sigma_max ** rho_inv
+            + (steps / (num_steps - 1))
+            * (self.sigma_min ** rho_inv - self.sigma_max ** rho_inv)
+        ) ** self.rho
+        sigmas = F.pad(sigmas, pad=(0, 1), value=0.0)
+        return sigmas
+""" Samplers """
+class Sampler(nn.Module):
+    diffusion_types: List[Type[Diffusion]] = []
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        raise NotImplementedError()
+    def inpaint(
+        self,
+        source: Tensor,
+        mask: Tensor,
+        fn: Callable,
+        sigmas: Tensor,
+        num_steps: int,
+        num_resamples: int,
+    ) -> Tensor:
+        raise NotImplementedError("Inpainting not available with current sampler")
+class VSampler(Sampler):
+    diffusion_types = [VDiffusion]
+    def get_alpha_beta(self, sigma: float) -> Tuple[float, float]:
+        angle = sigma * pi / 2
+        alpha = cos(angle)
+        beta = sin(angle)
+        return alpha, beta
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        x = sigmas[0] * noise
+        alpha, beta = self.get_alpha_beta(sigmas[0].item())
+        for i in range(num_steps - 1):
+            is_last = i == num_steps - 1
+            x_denoised = fn(x, sigma=sigmas[i])
+            x_pred = x * alpha - x_denoised * beta
+            x_eps = x * beta + x_denoised * alpha
+            if not is_last:
+                alpha, beta = self.get_alpha_beta(sigmas[i + 1].item())
+                x = x_pred * alpha + x_eps * beta
+        return x_pred
+class KarrasSampler(Sampler):
+    """https://arxiv.org/abs/2206.00364 algorithm 1"""
+    diffusion_types = [KDiffusion, VKDiffusion]
+    def __init__(
+        self,
+        s_tmin: float = 0,
+        s_tmax: float = float("inf"),
+        s_churn: float = 0.0,
+        s_noise: float = 1.0,
+    ):
+        super().__init__()
+        self.s_tmin = s_tmin
+        self.s_tmax = s_tmax
+        self.s_noise = s_noise
+        self.s_churn = s_churn
+    def step(
+        self, x: Tensor, fn: Callable, sigma: float, sigma_next: float, gamma: float
+    ) -> Tensor:
+        """Algorithm 2 (step)"""
+        # Select temporarily increased noise level
+        sigma_hat = sigma + gamma * sigma
+        # Add noise to move from sigma to sigma_hat
+        epsilon = self.s_noise * torch.randn_like(x)
+        x_hat = x + sqrt(sigma_hat ** 2 - sigma ** 2) * epsilon
+        # Evaluate ∂x/∂sigma at sigma_hat
+        d = (x_hat - fn(x_hat, sigma=sigma_hat)) / sigma_hat
+        # Take euler step from sigma_hat to sigma_next
+        x_next = x_hat + (sigma_next - sigma_hat) * d
+        # Second order correction
+        if sigma_next != 0:
+            model_out_next = fn(x_next, sigma=sigma_next)
+            d_prime = (x_next - model_out_next) / sigma_next
+            x_next = x_hat + 0.5 * (sigma - sigma_hat) * (d + d_prime)
+        return x_next
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        x = sigmas[0] * noise
+        # Compute gammas
+        gammas = torch.where(
+            (sigmas >= self.s_tmin) & (sigmas <= self.s_tmax),
+            min(self.s_churn / num_steps, sqrt(2) - 1),
+            0.0,
+        )
+        # Denoise to sample
+        for i in range(num_steps - 1):
+            x = self.step(
+                x, fn=fn, sigma=sigmas[i], sigma_next=sigmas[i + 1], gamma=gammas[i]  # type: ignore # noqa
+            )
+        return x
+class AEulerSampler(Sampler):
+    diffusion_types = [KDiffusion, VKDiffusion]
+    def get_sigmas(self, sigma: float, sigma_next: float) -> Tuple[float, float]:
+        sigma_up = sqrt(sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2)
+        sigma_down = sqrt(sigma_next ** 2 - sigma_up ** 2)
+        return sigma_up, sigma_down
+    def step(self, x: Tensor, fn: Callable, sigma: float, sigma_next: float) -> Tensor:
+        # Sigma steps
+        sigma_up, sigma_down = self.get_sigmas(sigma, sigma_next)
+        # Derivative at sigma (∂x/∂sigma)
+        d = (x - fn(x, sigma=sigma)) / sigma
+        # Euler method
+        x_next = x + d * (sigma_down - sigma)
+        # Add randomness
+        x_next = x_next + torch.randn_like(x) * sigma_up
+        return x_next
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        x = sigmas[0] * noise
+        # Denoise to sample
+        for i in range(num_steps - 1):
+            x = self.step(x, fn=fn, sigma=sigmas[i], sigma_next=sigmas[i + 1])  # type: ignore # noqa
+        return x
+class ADPM2Sampler(Sampler):
+    """https://www.desmos.com/calculator/jbxjlqd9mb"""
+    diffusion_types = [KDiffusion, VKDiffusion]
+    def __init__(self, rho: float = 1.0):
+        super().__init__()
+        self.rho = rho
+    def get_sigmas(self, sigma: float, sigma_next: float) -> Tuple[float, float, float]:
+        r = self.rho
+        sigma_up = sqrt(sigma_next ** 2 * (sigma ** 2 - sigma_next ** 2) / sigma ** 2)
+        sigma_down = sqrt(sigma_next ** 2 - sigma_up ** 2)
+        sigma_mid = ((sigma ** (1 / r) + sigma_down ** (1 / r)) / 2) ** r
+        return sigma_up, sigma_down, sigma_mid
+    def step(self, x: Tensor, fn: Callable, sigma: float, sigma_next: float) -> Tensor:
+        # Sigma steps
+        sigma_up, sigma_down, sigma_mid = self.get_sigmas(sigma, sigma_next)
+        # Derivative at sigma (∂x/∂sigma)
+        d = (x - fn(x, sigma=sigma)) / sigma
+        # Denoise to midpoint
+        x_mid = x + d * (sigma_mid - sigma)
+        # Derivative at sigma_mid (∂x_mid/∂sigma_mid)
+        d_mid = (x_mid - fn(x_mid, sigma=sigma_mid)) / sigma_mid
+        # Denoise to next
+        x = x + d_mid * (sigma_down - sigma)
+        # Add randomness
+        x_next = x + torch.randn_like(x) * sigma_up
+        return x_next
+    def forward(
+        self, noise: Tensor, fn: Callable, sigmas: Tensor, num_steps: int
+    ) -> Tensor:
+        x = sigmas[0] * noise
+        # Denoise to sample
+        for i in range(num_steps - 1):
+            x = self.step(x, fn=fn, sigma=sigmas[i], sigma_next=sigmas[i + 1])  # type: ignore # noqa
+        return x
+    def inpaint(
+        self,
+        source: Tensor,
+        mask: Tensor,
+        fn: Callable,
+        sigmas: Tensor,
+        num_steps: int,
+        num_resamples: int,
+    ) -> Tensor:
+        x = sigmas[0] * torch.randn_like(source)
+        for i in range(num_steps - 1):
+            # Noise source to current noise level
+            source_noisy = source + sigmas[i] * torch.randn_like(source)
+            for r in range(num_resamples):
+                # Merge noisy source and current then denoise
+                x = source_noisy * mask + x * ~mask
+                x = self.step(x, fn=fn, sigma=sigmas[i], sigma_next=sigmas[i + 1])  # type: ignore # noqa
+                # Renoise if not last resample step
+                if r < num_resamples - 1:
+                    sigma = sqrt(sigmas[i] ** 2 - sigmas[i + 1] ** 2)
+                    x = x + sigma * torch.randn_like(x)
+        return source * mask + x * ~mask
+""" Main Classes """
+class DiffusionSampler(nn.Module):
+    def __init__(
+        self,
+        diffusion: Diffusion,
+        *,
+        sampler: Sampler,
+        sigma_schedule: Schedule,
+        num_steps: Optional[int] = None,
+        clamp: bool = True,
+    ):
+        super().__init__()
+        self.denoise_fn = diffusion.denoise_fn
+        self.sampler = sampler
+        self.sigma_schedule = sigma_schedule
+        self.num_steps = num_steps
+        self.clamp = clamp
+        # Check sampler is compatible with diffusion type
+        sampler_class = sampler.__class__.__name__
+        diffusion_class = diffusion.__class__.__name__
+        message = f"{sampler_class} incompatible with {diffusion_class}"
+        assert diffusion.alias in [t.alias for t in sampler.diffusion_types], message
+    def forward(
+        self, noise: Tensor, num_steps: Optional[int] = None, **kwargs
+    ) -> Tensor:
+        device = noise.device
+        num_steps = default(num_steps, self.num_steps)  # type: ignore
+        assert exists(num_steps), "Parameter `num_steps` must be provided"
+        # Compute sigmas using schedule
+        sigmas = self.sigma_schedule(num_steps, device)
+        # Append additional kwargs to denoise function (used e.g. for conditional unet)
+        fn = lambda *a, **ka: self.denoise_fn(*a, **{**ka, **kwargs})  # noqa
+        # Sample using sampler
+        x = self.sampler(noise, fn=fn, sigmas=sigmas, num_steps=num_steps)
+        x = x.clamp(-1.0, 1.0) if self.clamp else x
+        return x
+class DiffusionInpainter(nn.Module):
+    def __init__(
+        self,
+        diffusion: Diffusion,
+        *,
+        num_steps: int,
+        num_resamples: int,
+        sampler: Sampler,
+        sigma_schedule: Schedule,
+    ):
+        super().__init__()
+        self.denoise_fn = diffusion.denoise_fn
+        self.num_steps = num_steps
+        self.num_resamples = num_resamples
+        self.inpaint_fn = sampler.inpaint
+        self.sigma_schedule = sigma_schedule
+    @torch.no_grad()
+    def forward(self, inpaint: Tensor, inpaint_mask: Tensor) -> Tensor:
+        x = self.inpaint_fn(
+            source=inpaint,
+            mask=inpaint_mask,
+            fn=self.denoise_fn,
+            sigmas=self.sigma_schedule(self.num_steps, inpaint.device),
+            num_steps=self.num_steps,
+            num_resamples=self.num_resamples,
+        )
+        return x
+def sequential_mask(like: Tensor, start: int) -> Tensor:
+    length, device = like.shape[2], like.device
+    mask = torch.ones_like(like, dtype=torch.bool)
+    mask[:, :, start:] = torch.zeros((length - start,), device=device)
+    return mask
+class SpanBySpanComposer(nn.Module):
+    def __init__(
+        self,
+        inpainter: DiffusionInpainter,
+        *,
+        num_spans: int,
+    ):
+        super().__init__()
+        self.inpainter = inpainter
+        self.num_spans = num_spans
+    def forward(self, start: Tensor, keep_start: bool = False) -> Tensor:
+        half_length = start.shape[2] // 2
+        spans = list(start.chunk(chunks=2, dim=-1)) if keep_start else []
+        # Inpaint second half from first half
+        inpaint = torch.zeros_like(start)
+        inpaint[:, :, :half_length] = start[:, :, half_length:]
+        inpaint_mask = sequential_mask(like=start, start=half_length)
+        for i in range(self.num_spans):
+            # Inpaint second half
+            span = self.inpainter(inpaint=inpaint, inpaint_mask=inpaint_mask)
+            # Replace first half with generated second half
+            second_half = span[:, :, half_length:]
+            inpaint[:, :, :half_length] = second_half
+            # Save generated span
+            spans.append(second_half)
+        return torch.cat(spans, dim=2)
+class XDiffusion(nn.Module):
+    def __init__(self, type: str, net: nn.Module, **kwargs):
+        super().__init__()
+        diffusion_classes = [VDiffusion, KDiffusion, VKDiffusion]
+        aliases = [t.alias for t in diffusion_classes]  # type: ignore
+        message = f"type='{type}' must be one of {*aliases,}"
+        assert type in aliases, message
+        self.net = net
+        for XDiffusion in diffusion_classes:
+            if XDiffusion.alias == type:  # type: ignore
+                self.diffusion = XDiffusion(net=net, **kwargs)
+    def forward(self, *args, **kwargs) -> Tensor:
+        return self.diffusion(*args, **kwargs)
+    def sample(
+        self,
+        noise: Tensor,
+        num_steps: int,
+        sigma_schedule: Schedule,
+        sampler: Sampler,
+        clamp: bool,
+        **kwargs,
+    ) -> Tensor:
+        diffusion_sampler = DiffusionSampler(
+            diffusion=self.diffusion,
+            sampler=sampler,
+            sigma_schedule=sigma_schedule,
+            num_steps=num_steps,
+            clamp=clamp,
+        )
+        return diffusion_sampler(noise, **kwargs)

Modules/diffusion/utils.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from functools import reduce
+from inspect import isfunction
+from math import ceil, floor, log2, pi
+from typing import Callable, Dict, List, Optional, Sequence, Tuple, TypeVar, Union
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from torch import Generator, Tensor
+from typing_extensions import TypeGuard
+T = TypeVar("T")
+def exists(val: Optional[T]) -> TypeGuard[T]:
+    return val is not None
+def iff(condition: bool, value: T) -> Optional[T]:
+    return value if condition else None
+def is_sequence(obj: T) -> TypeGuard[Union[list, tuple]]:
+    return isinstance(obj, list) or isinstance(obj, tuple)
+def default(val: Optional[T], d: Union[Callable[..., T], T]) -> T:
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+def to_list(val: Union[T, Sequence[T]]) -> List[T]:
+    if isinstance(val, tuple):
+        return list(val)
+    if isinstance(val, list):
+        return val
+    return [val]  # type: ignore
+def prod(vals: Sequence[int]) -> int:
+    return reduce(lambda x, y: x * y, vals)
+def closest_power_2(x: float) -> int:
+    exponent = log2(x)
+    distance_fn = lambda z: abs(x - 2 ** z)  # noqa
+    exponent_closest = min((floor(exponent), ceil(exponent)), key=distance_fn)
+    return 2 ** int(exponent_closest)
+def rand_bool(shape, proba, device = None):
+    if proba == 1:
+        return torch.ones(shape, device=device, dtype=torch.bool)
+    elif proba == 0:
+        return torch.zeros(shape, device=device, dtype=torch.bool)
+    else:
+        return torch.bernoulli(torch.full(shape, proba, device=device)).to(torch.bool)
+"""
+Kwargs Utils
+"""
+def group_dict_by_prefix(prefix: str, d: Dict) -> Tuple[Dict, Dict]:
+    return_dicts: Tuple[Dict, Dict] = ({}, {})
+    for key in d.keys():
+        no_prefix = int(not key.startswith(prefix))
+        return_dicts[no_prefix][key] = d[key]
+    return return_dicts
+def groupby(prefix: str, d: Dict, keep_prefix: bool = False) -> Tuple[Dict, Dict]:
+    kwargs_with_prefix, kwargs = group_dict_by_prefix(prefix, d)
+    if keep_prefix:
+        return kwargs_with_prefix, kwargs
+    kwargs_no_prefix = {k[len(prefix) :]: v for k, v in kwargs_with_prefix.items()}
+    return kwargs_no_prefix, kwargs
+def prefix_dict(prefix: str, d: Dict) -> Dict:
+    return {prefix + str(k): v for k, v in d.items()}

Modules/hifigan.py ADDED Viewed

	@@ -0,0 +1,477 @@

+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
+from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
+from .utils import init_weights, get_padding
+import math
+import random
+import numpy as np
+LRELU_SLOPE = 0.1
+class AdaIN1d(nn.Module):
+    def __init__(self, style_dim, num_features):
+        super().__init__()
+        self.norm = nn.InstanceNorm1d(num_features, affine=False)
+        self.fc = nn.Linear(style_dim, num_features*2)
+    def forward(self, x, s):
+        h = self.fc(s)
+        h = h.view(h.size(0), h.size(1), 1)
+        gamma, beta = torch.chunk(h, chunks=2, dim=1)
+        return (1 + gamma) * self.norm(x) + beta
+class AdaINResBlock1(torch.nn.Module):
+    def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5), style_dim=64):
+        super(AdaINResBlock1, self).__init__()
+        self.convs1 = nn.ModuleList([
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
+                               padding=get_padding(kernel_size, dilation[0]))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
+                               padding=get_padding(kernel_size, dilation[1]))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
+                               padding=get_padding(kernel_size, dilation[2])))
+        ])
+        self.convs1.apply(init_weights)
+        self.convs2 = nn.ModuleList([
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+                               padding=get_padding(kernel_size, 1))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+                               padding=get_padding(kernel_size, 1))),
+            weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
+                               padding=get_padding(kernel_size, 1)))
+        ])
+        self.convs2.apply(init_weights)
+        self.adain1 = nn.ModuleList([
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+        ])
+        self.adain2 = nn.ModuleList([
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+            AdaIN1d(style_dim, channels),
+        ])
+        self.alpha1 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs1))])
+        self.alpha2 = nn.ParameterList([nn.Parameter(torch.ones(1, channels, 1)) for i in range(len(self.convs2))])
+    def forward(self, x, s):
+        for c1, c2, n1, n2, a1, a2 in zip(self.convs1, self.convs2, self.adain1, self.adain2, self.alpha1, self.alpha2):
+            xt = n1(x, s)
+            xt = xt + (1 / a1) * (torch.sin(a1 * xt) ** 2)  # Snake1D
+            xt = c1(xt)
+            xt = n2(xt, s)
+            xt = xt + (1 / a2) * (torch.sin(a2 * xt) ** 2)  # Snake1D
+            xt = c2(xt)
+            x = xt + x
+        return x
+    def remove_weight_norm(self):
+        for l in self.convs1:
+            remove_weight_norm(l)
+        for l in self.convs2:
+            remove_weight_norm(l)
+class SineGen(torch.nn.Module):
+    """ Definition of sine generator
+    SineGen(samp_rate, harmonic_num = 0,
+            sine_amp = 0.1, noise_std = 0.003,
+            voiced_threshold = 0,
+            flag_for_pulse=False)
+    samp_rate: sampling rate in Hz
+    harmonic_num: number of harmonic overtones (default 0)
+    sine_amp: amplitude of sine-wavefrom (default 0.1)
+    noise_std: std of Gaussian noise (default 0.003)
+    voiced_thoreshold: F0 threshold for U/V classification (default 0)
+    flag_for_pulse: this SinGen is used inside PulseGen (default False)
+    Note: when flag_for_pulse is True, the first time step of a voiced
+        segment is always sin(np.pi) or cos(0)
+    """
+    def __init__(self, samp_rate, upsample_scale, harmonic_num=0,
+                 sine_amp=0.1, noise_std=0.003,
+                 voiced_threshold=0,
+                 flag_for_pulse=False):
+        super(SineGen, self).__init__()
+        self.sine_amp = sine_amp
+        self.noise_std = noise_std
+        self.harmonic_num = harmonic_num
+        self.dim = self.harmonic_num + 1
+        self.sampling_rate = samp_rate
+        self.voiced_threshold = voiced_threshold
+        self.flag_for_pulse = flag_for_pulse
+        self.upsample_scale = upsample_scale
+    def _f02uv(self, f0):
+        # generate uv signal
+        uv = (f0 > self.voiced_threshold).type(torch.float32)
+        return uv
+    def _f02sine(self, f0_values):
+        """ f0_values: (batchsize, length, dim)
+            where dim indicates fundamental tone and overtones
+        """
+        # convert to F0 in rad. The interger part n can be ignored
+        # because 2 * np.pi * n doesn't affect phase
+        rad_values = (f0_values / self.sampling_rate) % 1
+        # initial phase noise (no noise for fundamental component)
+        rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \
+                              device=f0_values.device)
+        rand_ini[:, 0] = 0
+        rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini
+        # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad)
+        if not self.flag_for_pulse:
+#             # for normal case
+#             # To prevent torch.cumsum numerical overflow,
+#             # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1.
+#             # Buffer tmp_over_one_idx indicates the time step to add -1.
+#             # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi
+#             tmp_over_one = torch.cumsum(rad_values, 1) % 1
+#             tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
+#             cumsum_shift = torch.zeros_like(rad_values)
+#             cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
+#             phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
+            rad_values = torch.nn.functional.interpolate(rad_values.transpose(1, 2),
+                                                         scale_factor=1/self.upsample_scale,
+                                                         mode="linear").transpose(1, 2)
+#             tmp_over_one = torch.cumsum(rad_values, 1) % 1
+#             tmp_over_one_idx = (padDiff(tmp_over_one)) < 0
+#             cumsum_shift = torch.zeros_like(rad_values)
+#             cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0
+            phase = torch.cumsum(rad_values, dim=1) * 2 * np.pi
+            phase = torch.nn.functional.interpolate(phase.transpose(1, 2) * self.upsample_scale,
+                                                    scale_factor=self.upsample_scale, mode="linear").transpose(1, 2)
+            sines = torch.sin(phase)
+        else:
+            # If necessary, make sure that the first time step of every
+            # voiced segments is sin(pi) or cos(0)
+            # This is used for pulse-train generation
+            # identify the last time step in unvoiced segments
+            uv = self._f02uv(f0_values)
+            uv_1 = torch.roll(uv, shifts=-1, dims=1)
+            uv_1[:, -1, :] = 1
+            u_loc = (uv < 1) * (uv_1 > 0)
+            # get the instantanouse phase
+            tmp_cumsum = torch.cumsum(rad_values, dim=1)
+            # different batch needs to be processed differently
+            for idx in range(f0_values.shape[0]):
+                temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :]
+                temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :]
+                # stores the accumulation of i.phase within
+                # each voiced segments
+                tmp_cumsum[idx, :, :] = 0
+                tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum
+            # rad_values - tmp_cumsum: remove the accumulation of i.phase
+            # within the previous voiced segment.
+            i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1)
+            # get the sines
+            sines = torch.cos(i_phase * 2 * np.pi)
+        return sines
+    def forward(self, f0):
+        """ sine_tensor, uv = forward(f0)
+        input F0: tensor(batchsize=1, length, dim=1)
+                  f0 for unvoiced steps should be 0
+        output sine_tensor: tensor(batchsize=1, length, dim)
+        output uv: tensor(batchsize=1, length, 1)
+        """
+        f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim,
+                             device=f0.device)
+        # fundamental component
+        fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device))
+        # generate sine waveforms
+        sine_waves = self._f02sine(fn) * self.sine_amp
+        # generate uv signal
+        # uv = torch.ones(f0.shape)
+        # uv = uv * (f0 > self.voiced_threshold)
+        uv = self._f02uv(f0)
+        # noise: for unvoiced should be similar to sine_amp
+        #        std = self.sine_amp/3 -> max value ~ self.sine_amp
+        # .       for voiced regions is self.noise_std
+        noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3
+        noise = noise_amp * torch.randn_like(sine_waves)
+        # first: set the unvoiced part to 0 by uv
+        # then: additive noise
+        sine_waves = sine_waves * uv + noise
+        return sine_waves, uv, noise
+class SourceModuleHnNSF(torch.nn.Module):
+    """ SourceModule for hn-nsf
+    SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1,
+                 add_noise_std=0.003, voiced_threshod=0)
+    sampling_rate: sampling_rate in Hz
+    harmonic_num: number of harmonic above F0 (default: 0)
+    sine_amp: amplitude of sine source signal (default: 0.1)
+    add_noise_std: std of additive Gaussian noise (default: 0.003)
+        note that amplitude of noise in unvoiced is decided
+        by sine_amp
+    voiced_threshold: threhold to set U/V given F0 (default: 0)
+    Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
+    F0_sampled (batchsize, length, 1)
+    Sine_source (batchsize, length, 1)
+    noise_source (batchsize, length 1)
+    uv (batchsize, length, 1)
+    """
+    def __init__(self, sampling_rate, upsample_scale, harmonic_num=0, sine_amp=0.1,
+                 add_noise_std=0.003, voiced_threshod=0):
+        super(SourceModuleHnNSF, self).__init__()
+        self.sine_amp = sine_amp
+        self.noise_std = add_noise_std
+        # to produce sine waveforms
+        self.l_sin_gen = SineGen(sampling_rate, upsample_scale, harmonic_num,
+                                 sine_amp, add_noise_std, voiced_threshod)
+        # to merge source harmonics into a single excitation
+        self.l_linear = torch.nn.Linear(harmonic_num + 1, 1)
+        self.l_tanh = torch.nn.Tanh()
+    def forward(self, x):
+        """
+        Sine_source, noise_source = SourceModuleHnNSF(F0_sampled)
+        F0_sampled (batchsize, length, 1)
+        Sine_source (batchsize, length, 1)
+        noise_source (batchsize, length 1)
+        """
+        # source for harmonic branch
+        with torch.no_grad():
+            sine_wavs, uv, _ = self.l_sin_gen(x)
+        sine_merge = self.l_tanh(self.l_linear(sine_wavs))
+        # source for noise branch, in the same shape as uv
+        noise = torch.randn_like(uv) * self.sine_amp / 3
+        return sine_merge, noise, uv
+def padDiff(x):
+    return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0)
+class Generator(torch.nn.Module):
+    def __init__(self, style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes):
+        super(Generator, self).__init__()
+        self.num_kernels = len(resblock_kernel_sizes)
+        self.num_upsamples = len(upsample_rates)
+        resblock = AdaINResBlock1
+        self.m_source = SourceModuleHnNSF(
+                    sampling_rate=24000,
+                    upsample_scale=np.prod(upsample_rates),
+                    harmonic_num=8, voiced_threshod=10)
+        self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates))
+        self.noise_convs = nn.ModuleList()
+        self.ups = nn.ModuleList()
+        self.noise_res = nn.ModuleList()
+        for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)):
+            c_cur = upsample_initial_channel // (2 ** (i + 1))
+            self.ups.append(weight_norm(ConvTranspose1d(upsample_initial_channel//(2**i),
+                         upsample_initial_channel//(2**(i+1)),
+                         k, u, padding=(u//2 + u%2), output_padding=u%2)))
+            if i + 1 < len(upsample_rates):  #
+                stride_f0 = np.prod(upsample_rates[i + 1:])
+                self.noise_convs.append(Conv1d(
+                    1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=(stride_f0+1) // 2))
+                self.noise_res.append(resblock(c_cur, 7, [1,3,5], style_dim))
+            else:
+                self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1))
+                self.noise_res.append(resblock(c_cur, 11, [1,3,5], style_dim))
+        self.resblocks = nn.ModuleList()
+        self.alphas = nn.ParameterList()
+        self.alphas.append(nn.Parameter(torch.ones(1, upsample_initial_channel, 1)))
+        for i in range(len(self.ups)):
+            ch = upsample_initial_channel//(2**(i+1))
+            self.alphas.append(nn.Parameter(torch.ones(1, ch, 1)))
+            for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)):
+                self.resblocks.append(resblock(ch, k, d, style_dim))
+        self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
+        self.ups.apply(init_weights)
+        self.conv_post.apply(init_weights)
+    def forward(self, x, s, f0):
+        f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2)  # bs,n,t
+        har_source, noi_source, uv = self.m_source(f0)
+        har_source = har_source.transpose(1, 2)
+        for i in range(self.num_upsamples):
+            x = x + (1 / self.alphas[i]) * (torch.sin(self.alphas[i] * x) ** 2)
+            x_source = self.noise_convs[i](har_source)
+            x_source = self.noise_res[i](x_source, s)
+            x = self.ups[i](x)
+            x = x + x_source
+            xs = None
+            for j in range(self.num_kernels):
+                if xs is None:
+                    xs = self.resblocks[i*self.num_kernels+j](x, s)
+                else:
+                    xs += self.resblocks[i*self.num_kernels+j](x, s)
+            x = xs / self.num_kernels
+        x = x + (1 / self.alphas[i+1]) * (torch.sin(self.alphas[i+1] * x) ** 2)
+        x = self.conv_post(x)
+        x = torch.tanh(x)
+        return x
+    def remove_weight_norm(self):
+        print('Removing weight norm...')
+        for l in self.ups:
+            remove_weight_norm(l)
+        for l in self.resblocks:
+            l.remove_weight_norm()
+        remove_weight_norm(self.conv_pre)
+        remove_weight_norm(self.conv_post)
+class AdainResBlk1d(nn.Module):
+    def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
+                 upsample='none', dropout_p=0.0):
+        super().__init__()
+        self.actv = actv
+        self.upsample_type = upsample
+        self.upsample = UpSample1d(upsample)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out, style_dim)
+        self.dropout = nn.Dropout(dropout_p)
+        if upsample == 'none':
+            self.pool = nn.Identity()
+        else:
+            self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
+    def _build_weights(self, dim_in, dim_out, style_dim):
+        self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
+        self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
+        self.norm1 = AdaIN1d(style_dim, dim_in)
+        self.norm2 = AdaIN1d(style_dim, dim_out)
+        if self.learned_sc:
+            self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
+    def _shortcut(self, x):
+        x = self.upsample(x)
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        return x
+    def _residual(self, x, s):
+        x = self.norm1(x, s)
+        x = self.actv(x)
+        x = self.pool(x)
+        x = self.conv1(self.dropout(x))
+        x = self.norm2(x, s)
+        x = self.actv(x)
+        x = self.conv2(self.dropout(x))
+        return x
+    def forward(self, x, s):
+        out = self._residual(x, s)
+        out = (out + self._shortcut(x)) / math.sqrt(2)
+        return out
+class UpSample1d(nn.Module):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        else:
+            return F.interpolate(x, scale_factor=2, mode='nearest')
+class Decoder(nn.Module):
+    def __init__(self, dim_in=512, F0_channel=512, style_dim=64, dim_out=80,
+                resblock_kernel_sizes = [3,7,11],
+                upsample_rates = [10,5,3,2],
+                upsample_initial_channel=512,
+                resblock_dilation_sizes=[[1,3,5], [1,3,5], [1,3,5]],
+                upsample_kernel_sizes=[20,10,6,4]):
+        super().__init__()
+        self.decode = nn.ModuleList()
+        self.encode = AdainResBlk1d(dim_in + 2, 1024, style_dim)
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 1024, style_dim))
+        self.decode.append(AdainResBlk1d(1024 + 2 + 64, 512, style_dim, upsample=True))
+        self.F0_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
+        self.N_conv = weight_norm(nn.Conv1d(1, 1, kernel_size=3, stride=2, groups=1, padding=1))
+        self.asr_res = nn.Sequential(
+            weight_norm(nn.Conv1d(512, 64, kernel_size=1)),
+        )
+        self.generator = Generator(style_dim, resblock_kernel_sizes, upsample_rates, upsample_initial_channel, resblock_dilation_sizes, upsample_kernel_sizes)
+    def forward(self, asr, F0_curve, N, s):
+        if self.training:
+            downlist = [0, 3, 7]
+            F0_down = downlist[random.randint(0, 2)]
+            downlist = [0, 3, 7, 15]
+            N_down = downlist[random.randint(0, 3)]
+            if F0_down:
+                F0_curve = nn.functional.conv1d(F0_curve.unsqueeze(1), torch.ones(1, 1, F0_down).to('cuda'), padding=F0_down//2).squeeze(1) / F0_down
+            if N_down:
+                N = nn.functional.conv1d(N.unsqueeze(1), torch.ones(1, 1, N_down).to('cuda'), padding=N_down//2).squeeze(1)  / N_down
+        F0 = self.F0_conv(F0_curve.unsqueeze(1))
+        N = self.N_conv(N.unsqueeze(1))
+        x = torch.cat([asr, F0, N], axis=1)
+        x = self.encode(x, s)
+        asr_res = self.asr_res(asr)
+        res = True
+        for block in self.decode:
+            if res:
+                x = torch.cat([x, asr_res, F0, N], axis=1)
+            x = block(x, s)
+            if block.upsample_type != "none":
+                res = False
+        x = self.generator(x, s, F0_curve)
+        return x

Modules/utils.py ADDED Viewed

	@@ -0,0 +1,14 @@

+def init_weights(m, mean=0.0, std=0.01):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        m.weight.data.normal_(mean, std)
+def apply_weight_norm(m):
+    classname = m.__class__.__name__
+    if classname.find("Conv") != -1:
+        weight_norm(m)
+def get_padding(kernel_size, dilation=1):
+    return int((kernel_size*dilation - dilation)/2)

README.md CHANGED Viewed

@@ -11,4 +11,13 @@ license: mit
 short_description: Build custom voices in StyleTTS 2
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 short_description: Build custom voices in StyleTTS 2
 ---
+# StyleTTS2 Studio
+Customizable Voices for the StyleTTS2 text-to-speech model based on [StyleTTS2](https://github.com/yl4579/StyleTTS2) and [artificial StyleTTS2](https://huggingface.co/dkounadis/artificial-styletts2/tree/main).
+I used Label Studio to label 50 randomly generated voices with the following 6 features: Gender, Tone, Quality, Pace, Enunciation and Style.
+These 6 features were used for a Principal Component Analysis (PCA) to reduce the 256-dimensional style vector into manageable dimensions. The results can likely be further enhanced by selecting better features and labeling more samples, but these first results show that it's generally possible to dial in specific voice features.
+**Disclaimer from the original StyleTTS2 repo:**
+Pre-Trained Models: Before using these pre-trained models, you agree to inform the listeners that the speech samples are synthesized by the pre-trained models, unless you have the permission to use the voice you synthesize. That is, you agree to only use voices whose speakers grant the permission to have their voice cloned, either directly or by license before making synthesized voices public, or you have to publicly announce that these voices are synthesized if you do not have the permission to use these voices.

Utils/ASR/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

Utils/ASR/config.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+log_dir: "logs/20201006"
+save_freq: 5
+device: "cuda"
+epochs: 180
+batch_size: 64
+pretrained_model: ""
+train_data: "ASRDataset/train_list.txt"
+val_data: "ASRDataset/val_list.txt"
+dataset_params:
+  data_augmentation: false
+preprocess_parasm:
+  sr: 24000
+  spect_params:
+    n_fft: 2048
+    win_length: 1200
+    hop_length: 300
+  mel_params:
+    n_mels: 80
+model_params:
+   input_dim: 80
+   hidden_dim: 256
+   n_token: 178
+   token_embedding_dim: 512
+optimizer_params:
+  lr: 0.0005

Utils/ASR/epoch_00080.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fedd55a1234b0c56e1e8b509c74edf3a5e2f27106a66038a4a946047a775bd6c
+size 94552811

Utils/ASR/layers.py ADDED Viewed

	@@ -0,0 +1,354 @@

+import math
+import torch
+from torch import nn
+from typing import Optional, Any
+from torch import Tensor
+import torch.nn.functional as F
+import torchaudio
+import torchaudio.functional as audio_F
+import random
+random.seed(0)
+def _get_activation_fn(activ):
+    if activ == 'relu':
+        return nn.ReLU()
+    elif activ == 'lrelu':
+        return nn.LeakyReLU(0.2)
+    elif activ == 'swish':
+        return lambda x: x*torch.sigmoid(x)
+    else:
+        raise RuntimeError('Unexpected activ type %s, expected [relu, lrelu, swish]' % activ)
+class LinearNorm(torch.nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
+        torch.nn.init.xavier_uniform_(
+            self.linear_layer.weight,
+            gain=torch.nn.init.calculate_gain(w_init_gain))
+    def forward(self, x):
+        return self.linear_layer(x)
+class ConvNorm(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
+                 padding=None, dilation=1, bias=True, w_init_gain='linear', param=None):
+        super(ConvNorm, self).__init__()
+        if padding is None:
+            assert(kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2)
+        self.conv = torch.nn.Conv1d(in_channels, out_channels,
+                                    kernel_size=kernel_size, stride=stride,
+                                    padding=padding, dilation=dilation,
+                                    bias=bias)
+        torch.nn.init.xavier_uniform_(
+            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain, param=param))
+    def forward(self, signal):
+        conv_signal = self.conv(signal)
+        return conv_signal
+class CausualConv(nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=1, dilation=1, bias=True, w_init_gain='linear', param=None):
+        super(CausualConv, self).__init__()
+        if padding is None:
+            assert(kernel_size % 2 == 1)
+            padding = int(dilation * (kernel_size - 1) / 2) * 2
+        else:
+            self.padding = padding * 2
+        self.conv = nn.Conv1d(in_channels, out_channels,
+                              kernel_size=kernel_size, stride=stride,
+                              padding=self.padding,
+                              dilation=dilation,
+                              bias=bias)
+        torch.nn.init.xavier_uniform_(
+            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain, param=param))
+    def forward(self, x):
+        x = self.conv(x)
+        x = x[:, :, :-self.padding]
+        return x
+class CausualBlock(nn.Module):
+    def __init__(self, hidden_dim, n_conv=3, dropout_p=0.2, activ='lrelu'):
+        super(CausualBlock, self).__init__()
+        self.blocks = nn.ModuleList([
+            self._get_conv(hidden_dim, dilation=3**i, activ=activ, dropout_p=dropout_p)
+            for i in range(n_conv)])
+    def forward(self, x):
+        for block in self.blocks:
+            res = x
+            x = block(x)
+            x += res
+        return x
+    def _get_conv(self, hidden_dim, dilation, activ='lrelu', dropout_p=0.2):
+        layers = [
+            CausualConv(hidden_dim, hidden_dim, kernel_size=3, padding=dilation, dilation=dilation),
+            _get_activation_fn(activ),
+            nn.BatchNorm1d(hidden_dim),
+            nn.Dropout(p=dropout_p),
+            CausualConv(hidden_dim, hidden_dim, kernel_size=3, padding=1, dilation=1),
+            _get_activation_fn(activ),
+            nn.Dropout(p=dropout_p)
+        ]
+        return nn.Sequential(*layers)
+class ConvBlock(nn.Module):
+    def __init__(self, hidden_dim, n_conv=3, dropout_p=0.2, activ='relu'):
+        super().__init__()
+        self._n_groups = 8
+        self.blocks = nn.ModuleList([
+            self._get_conv(hidden_dim, dilation=3**i, activ=activ, dropout_p=dropout_p)
+            for i in range(n_conv)])
+    def forward(self, x):
+        for block in self.blocks:
+            res = x
+            x = block(x)
+            x += res
+        return x
+    def _get_conv(self, hidden_dim, dilation, activ='relu', dropout_p=0.2):
+        layers = [
+            ConvNorm(hidden_dim, hidden_dim, kernel_size=3, padding=dilation, dilation=dilation),
+            _get_activation_fn(activ),
+            nn.GroupNorm(num_groups=self._n_groups, num_channels=hidden_dim),
+            nn.Dropout(p=dropout_p),
+            ConvNorm(hidden_dim, hidden_dim, kernel_size=3, padding=1, dilation=1),
+            _get_activation_fn(activ),
+            nn.Dropout(p=dropout_p)
+        ]
+        return nn.Sequential(*layers)
+class LocationLayer(nn.Module):
+    def __init__(self, attention_n_filters, attention_kernel_size,
+                 attention_dim):
+        super(LocationLayer, self).__init__()
+        padding = int((attention_kernel_size - 1) / 2)
+        self.location_conv = ConvNorm(2, attention_n_filters,
+                                      kernel_size=attention_kernel_size,
+                                      padding=padding, bias=False, stride=1,
+                                      dilation=1)
+        self.location_dense = LinearNorm(attention_n_filters, attention_dim,
+                                         bias=False, w_init_gain='tanh')
+    def forward(self, attention_weights_cat):
+        processed_attention = self.location_conv(attention_weights_cat)
+        processed_attention = processed_attention.transpose(1, 2)
+        processed_attention = self.location_dense(processed_attention)
+        return processed_attention
+class Attention(nn.Module):
+    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
+                 attention_location_n_filters, attention_location_kernel_size):
+        super(Attention, self).__init__()
+        self.query_layer = LinearNorm(attention_rnn_dim, attention_dim,
+                                      bias=False, w_init_gain='tanh')
+        self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False,
+                                       w_init_gain='tanh')
+        self.v = LinearNorm(attention_dim, 1, bias=False)
+        self.location_layer = LocationLayer(attention_location_n_filters,
+                                            attention_location_kernel_size,
+                                            attention_dim)
+        self.score_mask_value = -float("inf")
+    def get_alignment_energies(self, query, processed_memory,
+                               attention_weights_cat):
+        """
+        PARAMS
+        ------
+        query: decoder output (batch, n_mel_channels * n_frames_per_step)
+        processed_memory: processed encoder outputs (B, T_in, attention_dim)
+        attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
+        RETURNS
+        -------
+        alignment (batch, max_time)
+        """
+        processed_query = self.query_layer(query.unsqueeze(1))
+        processed_attention_weights = self.location_layer(attention_weights_cat)
+        energies = self.v(torch.tanh(
+            processed_query + processed_attention_weights + processed_memory))
+        energies = energies.squeeze(-1)
+        return energies
+    def forward(self, attention_hidden_state, memory, processed_memory,
+                attention_weights_cat, mask):
+        """
+        PARAMS
+        ------
+        attention_hidden_state: attention rnn last output
+        memory: encoder outputs
+        processed_memory: processed encoder outputs
+        attention_weights_cat: previous and cummulative attention weights
+        mask: binary mask for padded data
+        """
+        alignment = self.get_alignment_energies(
+            attention_hidden_state, processed_memory, attention_weights_cat)
+        if mask is not None:
+            alignment.data.masked_fill_(mask, self.score_mask_value)
+        attention_weights = F.softmax(alignment, dim=1)
+        attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
+        attention_context = attention_context.squeeze(1)
+        return attention_context, attention_weights
+class ForwardAttentionV2(nn.Module):
+    def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
+                 attention_location_n_filters, attention_location_kernel_size):
+        super(ForwardAttentionV2, self).__init__()
+        self.query_layer = LinearNorm(attention_rnn_dim, attention_dim,
+                                      bias=False, w_init_gain='tanh')
+        self.memory_layer = LinearNorm(embedding_dim, attention_dim, bias=False,
+                                       w_init_gain='tanh')
+        self.v = LinearNorm(attention_dim, 1, bias=False)
+        self.location_layer = LocationLayer(attention_location_n_filters,
+                                            attention_location_kernel_size,
+                                            attention_dim)
+        self.score_mask_value = -float(1e20)
+    def get_alignment_energies(self, query, processed_memory,
+                               attention_weights_cat):
+        """
+        PARAMS
+        ------
+        query: decoder output (batch, n_mel_channels * n_frames_per_step)
+        processed_memory: processed encoder outputs (B, T_in, attention_dim)
+        attention_weights_cat:  prev. and cumulative att weights (B, 2, max_time)
+        RETURNS
+        -------
+        alignment (batch, max_time)
+        """
+        processed_query = self.query_layer(query.unsqueeze(1))
+        processed_attention_weights = self.location_layer(attention_weights_cat)
+        energies = self.v(torch.tanh(
+            processed_query + processed_attention_weights + processed_memory))
+        energies = energies.squeeze(-1)
+        return energies
+    def forward(self, attention_hidden_state, memory, processed_memory,
+                attention_weights_cat, mask, log_alpha):
+        """
+        PARAMS
+        ------
+        attention_hidden_state: attention rnn last output
+        memory: encoder outputs
+        processed_memory: processed encoder outputs
+        attention_weights_cat: previous and cummulative attention weights
+        mask: binary mask for padded data
+        """
+        log_energy = self.get_alignment_energies(
+            attention_hidden_state, processed_memory, attention_weights_cat)
+        #log_energy =
+        if mask is not None:
+            log_energy.data.masked_fill_(mask, self.score_mask_value)
+        #attention_weights = F.softmax(alignment, dim=1)
+        #content_score = log_energy.unsqueeze(1) #[B, MAX_TIME] -> [B, 1, MAX_TIME]
+        #log_alpha = log_alpha.unsqueeze(2) #[B, MAX_TIME] -> [B, MAX_TIME, 1]
+        #log_total_score = log_alpha + content_score
+        #previous_attention_weights = attention_weights_cat[:,0,:]
+        log_alpha_shift_padded = []
+        max_time = log_energy.size(1)
+        for sft in range(2):
+            shifted = log_alpha[:,:max_time-sft]
+            shift_padded = F.pad(shifted, (sft,0), 'constant', self.score_mask_value)
+            log_alpha_shift_padded.append(shift_padded.unsqueeze(2))
+        biased = torch.logsumexp(torch.cat(log_alpha_shift_padded,2), 2)
+        log_alpha_new = biased +  log_energy
+        attention_weights =  F.softmax(log_alpha_new, dim=1)
+        attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
+        attention_context = attention_context.squeeze(1)
+        return attention_context, attention_weights, log_alpha_new
+class PhaseShuffle2d(nn.Module):
+    def __init__(self, n=2):
+        super(PhaseShuffle2d, self).__init__()
+        self.n = n
+        self.random = random.Random(1)
+    def forward(self, x, move=None):
+        # x.size = (B, C, M, L)
+        if move is None:
+            move = self.random.randint(-self.n, self.n)
+        if move == 0:
+            return x
+        else:
+            left = x[:, :, :, :move]
+            right = x[:, :, :, move:]
+            shuffled = torch.cat([right, left], dim=3)
+        return shuffled
+class PhaseShuffle1d(nn.Module):
+    def __init__(self, n=2):
+        super(PhaseShuffle1d, self).__init__()
+        self.n = n
+        self.random = random.Random(1)
+    def forward(self, x, move=None):
+        # x.size = (B, C, M, L)
+        if move is None:
+            move = self.random.randint(-self.n, self.n)
+        if move == 0:
+            return x
+        else:
+            left = x[:, :,  :move]
+            right = x[:, :, move:]
+            shuffled = torch.cat([right, left], dim=2)
+        return shuffled
+class MFCC(nn.Module):
+    def __init__(self, n_mfcc=40, n_mels=80):
+        super(MFCC, self).__init__()
+        self.n_mfcc = n_mfcc
+        self.n_mels = n_mels
+        self.norm = 'ortho'
+        dct_mat = audio_F.create_dct(self.n_mfcc, self.n_mels, self.norm)
+        self.register_buffer('dct_mat', dct_mat)
+    def forward(self, mel_specgram):
+        if len(mel_specgram.shape) == 2:
+            mel_specgram = mel_specgram.unsqueeze(0)
+            unsqueezed = True
+        else:
+            unsqueezed = False
+        # (channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc)
+        # -> (channel, time, n_mfcc).tranpose(...)
+        mfcc = torch.matmul(mel_specgram.transpose(1, 2), self.dct_mat).transpose(1, 2)
+        # unpack batch
+        if unsqueezed:
+            mfcc = mfcc.squeeze(0)
+        return mfcc

Utils/ASR/models.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import math
+import torch
+from torch import nn
+from torch.nn import TransformerEncoder
+import torch.nn.functional as F
+from .layers import MFCC, Attention, LinearNorm, ConvNorm, ConvBlock
+class ASRCNN(nn.Module):
+    def __init__(self,
+                 input_dim=80,
+                 hidden_dim=256,
+                 n_token=35,
+                 n_layers=6,
+                 token_embedding_dim=256,
+    ):
+        super().__init__()
+        self.n_token = n_token
+        self.n_down = 1
+        self.to_mfcc = MFCC()
+        self.init_cnn = ConvNorm(input_dim//2, hidden_dim, kernel_size=7, padding=3, stride=2)
+        self.cnns = nn.Sequential(
+            *[nn.Sequential(
+                ConvBlock(hidden_dim),
+                nn.GroupNorm(num_groups=1, num_channels=hidden_dim)
+            ) for n in range(n_layers)])
+        self.projection = ConvNorm(hidden_dim, hidden_dim // 2)
+        self.ctc_linear = nn.Sequential(
+            LinearNorm(hidden_dim//2, hidden_dim),
+            nn.ReLU(),
+            LinearNorm(hidden_dim, n_token))
+        self.asr_s2s = ASRS2S(
+            embedding_dim=token_embedding_dim,
+            hidden_dim=hidden_dim//2,
+            n_token=n_token)
+    def forward(self, x, src_key_padding_mask=None, text_input=None):
+        x = self.to_mfcc(x)
+        x = self.init_cnn(x)
+        x = self.cnns(x)
+        x = self.projection(x)
+        x = x.transpose(1, 2)
+        ctc_logit = self.ctc_linear(x)
+        if text_input is not None:
+            _, s2s_logit, s2s_attn = self.asr_s2s(x, src_key_padding_mask, text_input)
+            return ctc_logit, s2s_logit, s2s_attn
+        else:
+            return ctc_logit
+    def get_feature(self, x):
+        x = self.to_mfcc(x.squeeze(1))
+        x = self.init_cnn(x)
+        x = self.cnns(x)
+        x = self.projection(x)
+        return x
+    def length_to_mask(self, lengths):
+        mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+        mask = torch.gt(mask+1, lengths.unsqueeze(1)).to(lengths.device)
+        return mask
+    def get_future_mask(self, out_length, unmask_future_steps=0):
+        """
+        Args:
+            out_length (int): returned mask shape is (out_length, out_length).
+            unmask_futre_steps (int): unmasking future step size.
+        Return:
+            mask (torch.BoolTensor): mask future timesteps mask[i, j] = True if i > j + unmask_future_steps else False
+        """
+        index_tensor = torch.arange(out_length).unsqueeze(0).expand(out_length, -1)
+        mask = torch.gt(index_tensor, index_tensor.T + unmask_future_steps)
+        return mask
+class ASRS2S(nn.Module):
+    def __init__(self,
+                 embedding_dim=256,
+                 hidden_dim=512,
+                 n_location_filters=32,
+                 location_kernel_size=63,
+                 n_token=40):
+        super(ASRS2S, self).__init__()
+        self.embedding = nn.Embedding(n_token, embedding_dim)
+        val_range = math.sqrt(6 / hidden_dim)
+        self.embedding.weight.data.uniform_(-val_range, val_range)
+        self.decoder_rnn_dim = hidden_dim
+        self.project_to_n_symbols = nn.Linear(self.decoder_rnn_dim, n_token)
+        self.attention_layer = Attention(
+            self.decoder_rnn_dim,
+            hidden_dim,
+            hidden_dim,
+            n_location_filters,
+            location_kernel_size
+        )
+        self.decoder_rnn = nn.LSTMCell(self.decoder_rnn_dim + embedding_dim, self.decoder_rnn_dim)
+        self.project_to_hidden = nn.Sequential(
+            LinearNorm(self.decoder_rnn_dim * 2, hidden_dim),
+            nn.Tanh())
+        self.sos = 1
+        self.eos = 2
+    def initialize_decoder_states(self, memory, mask):
+        """
+        moemory.shape = (B, L, H) = (Batchsize, Maxtimestep, Hiddendim)
+        """
+        B, L, H = memory.shape
+        self.decoder_hidden = torch.zeros((B, self.decoder_rnn_dim)).type_as(memory)
+        self.decoder_cell = torch.zeros((B, self.decoder_rnn_dim)).type_as(memory)
+        self.attention_weights = torch.zeros((B, L)).type_as(memory)
+        self.attention_weights_cum = torch.zeros((B, L)).type_as(memory)
+        self.attention_context = torch.zeros((B, H)).type_as(memory)
+        self.memory = memory
+        self.processed_memory = self.attention_layer.memory_layer(memory)
+        self.mask = mask
+        self.unk_index = 3
+        self.random_mask = 0.1
+    def forward(self, memory, memory_mask, text_input):
+        """
+        moemory.shape = (B, L, H) = (Batchsize, Maxtimestep, Hiddendim)
+        moemory_mask.shape = (B, L, )
+        texts_input.shape = (B, T)
+        """
+        self.initialize_decoder_states(memory, memory_mask)
+        # text random mask
+        random_mask = (torch.rand(text_input.shape) < self.random_mask).to(text_input.device)
+        _text_input = text_input.clone()
+        _text_input.masked_fill_(random_mask, self.unk_index)
+        decoder_inputs = self.embedding(_text_input).transpose(0, 1) # -> [T, B, channel]
+        start_embedding = self.embedding(
+            torch.LongTensor([self.sos]*decoder_inputs.size(1)).to(decoder_inputs.device))
+        decoder_inputs = torch.cat((start_embedding.unsqueeze(0), decoder_inputs), dim=0)
+        hidden_outputs, logit_outputs, alignments = [], [], []
+        while len(hidden_outputs) < decoder_inputs.size(0):
+            decoder_input = decoder_inputs[len(hidden_outputs)]
+            hidden, logit, attention_weights = self.decode(decoder_input)
+            hidden_outputs += [hidden]
+            logit_outputs += [logit]
+            alignments += [attention_weights]
+        hidden_outputs, logit_outputs, alignments = \
+            self.parse_decoder_outputs(
+                hidden_outputs, logit_outputs, alignments)
+        return hidden_outputs, logit_outputs, alignments
+    def decode(self, decoder_input):
+        cell_input = torch.cat((decoder_input, self.attention_context), -1)
+        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
+            cell_input,
+            (self.decoder_hidden, self.decoder_cell))
+        attention_weights_cat = torch.cat(
+            (self.attention_weights.unsqueeze(1),
+            self.attention_weights_cum.unsqueeze(1)),dim=1)
+        self.attention_context, self.attention_weights = self.attention_layer(
+            self.decoder_hidden,
+            self.memory,
+            self.processed_memory,
+            attention_weights_cat,
+            self.mask)
+        self.attention_weights_cum += self.attention_weights
+        hidden_and_context = torch.cat((self.decoder_hidden, self.attention_context), -1)
+        hidden = self.project_to_hidden(hidden_and_context)
+        # dropout to increasing g
+        logit = self.project_to_n_symbols(F.dropout(hidden, 0.5, self.training))
+        return hidden, logit, self.attention_weights
+    def parse_decoder_outputs(self, hidden, logit, alignments):
+        # -> [B, T_out + 1, max_time]
+        alignments = torch.stack(alignments).transpose(0,1)
+        # [T_out + 1, B, n_symbols] -> [B, T_out + 1,  n_symbols]
+        logit = torch.stack(logit).transpose(0, 1).contiguous()
+        hidden = torch.stack(hidden).transpose(0, 1).contiguous()
+        return hidden, logit, alignments

Utils/JDC/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

Utils/JDC/bst.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54dc94364b97e18ac1dfa6287714ed121248cfaac4cfd39d061c6e0a089ef169
+size 21029926

Utils/JDC/model.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""
+Implementation of model from:
+Kum et al. - "Joint Detection and Classification of Singing Voice Melody Using
+Convolutional Recurrent Neural Networks" (2019)
+Link: https://www.semanticscholar.org/paper/Joint-Detection-and-Classification-of-Singing-Voice-Kum-Nam/60a2ad4c7db43bace75805054603747fcd062c0d
+"""
+import torch
+from torch import nn
+class JDCNet(nn.Module):
+    """
+    Joint Detection and Classification Network model for singing voice melody.
+    """
+    def __init__(self, num_class=722, seq_len=31, leaky_relu_slope=0.01):
+        super().__init__()
+        self.num_class = num_class
+        # input = (b, 1, 31, 513), b = batch size
+        self.conv_block = nn.Sequential(
+            nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1, bias=False),  # out: (b, 64, 31, 513)
+            nn.BatchNorm2d(num_features=64),
+            nn.LeakyReLU(leaky_relu_slope, inplace=True),
+            nn.Conv2d(64, 64, 3, padding=1, bias=False),  # (b, 64, 31, 513)
+        )
+        # res blocks
+        self.res_block1 = ResBlock(in_channels=64, out_channels=128)  # (b, 128, 31, 128)
+        self.res_block2 = ResBlock(in_channels=128, out_channels=192)  # (b, 192, 31, 32)
+        self.res_block3 = ResBlock(in_channels=192, out_channels=256)  # (b, 256, 31, 8)
+        # pool block
+        self.pool_block = nn.Sequential(
+            nn.BatchNorm2d(num_features=256),
+            nn.LeakyReLU(leaky_relu_slope, inplace=True),
+            nn.MaxPool2d(kernel_size=(1, 4)),  # (b, 256, 31, 2)
+            nn.Dropout(p=0.2),
+        )
+        # maxpool layers (for auxiliary network inputs)
+        # in = (b, 128, 31, 513) from conv_block, out = (b, 128, 31, 2)
+        self.maxpool1 = nn.MaxPool2d(kernel_size=(1, 40))
+        # in = (b, 128, 31, 128) from res_block1, out = (b, 128, 31, 2)
+        self.maxpool2 = nn.MaxPool2d(kernel_size=(1, 20))
+        # in = (b, 128, 31, 32) from res_block2, out = (b, 128, 31, 2)
+        self.maxpool3 = nn.MaxPool2d(kernel_size=(1, 10))
+        # in = (b, 640, 31, 2), out = (b, 256, 31, 2)
+        self.detector_conv = nn.Sequential(
+            nn.Conv2d(640, 256, 1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.LeakyReLU(leaky_relu_slope, inplace=True),
+            nn.Dropout(p=0.2),
+        )
+        # input: (b, 31, 512) - resized from (b, 256, 31, 2)
+        self.bilstm_classifier = nn.LSTM(
+            input_size=512, hidden_size=256,
+            batch_first=True, bidirectional=True)  # (b, 31, 512)
+        # input: (b, 31, 512) - resized from (b, 256, 31, 2)
+        self.bilstm_detector = nn.LSTM(
+            input_size=512, hidden_size=256,
+            batch_first=True, bidirectional=True)  # (b, 31, 512)
+        # input: (b * 31, 512)
+        self.classifier = nn.Linear(in_features=512, out_features=self.num_class)  # (b * 31, num_class)
+        # input: (b * 31, 512)
+        self.detector = nn.Linear(in_features=512, out_features=2)  # (b * 31, 2) - binary classifier
+        # initialize weights
+        self.apply(self.init_weights)
+    def get_feature_GAN(self, x):
+        seq_len = x.shape[-2]
+        x = x.float().transpose(-1, -2)
+        convblock_out = self.conv_block(x)
+        resblock1_out = self.res_block1(convblock_out)
+        resblock2_out = self.res_block2(resblock1_out)
+        resblock3_out = self.res_block3(resblock2_out)
+        poolblock_out = self.pool_block[0](resblock3_out)
+        poolblock_out = self.pool_block[1](poolblock_out)
+        return poolblock_out.transpose(-1, -2)
+    def get_feature(self, x):
+        seq_len = x.shape[-2]
+        x = x.float().transpose(-1, -2)
+        convblock_out = self.conv_block(x)
+        resblock1_out = self.res_block1(convblock_out)
+        resblock2_out = self.res_block2(resblock1_out)
+        resblock3_out = self.res_block3(resblock2_out)
+        poolblock_out = self.pool_block[0](resblock3_out)
+        poolblock_out = self.pool_block[1](poolblock_out)
+        return self.pool_block[2](poolblock_out)
+    def forward(self, x):
+        """
+        Returns:
+            classification_prediction, detection_prediction
+            sizes: (b, 31, 722), (b, 31, 2)
+        """
+        ###############################
+        # forward pass for classifier #
+        ###############################
+        seq_len = x.shape[-1]
+        x = x.float().transpose(-1, -2)
+        convblock_out = self.conv_block(x)
+        resblock1_out = self.res_block1(convblock_out)
+        resblock2_out = self.res_block2(resblock1_out)
+        resblock3_out = self.res_block3(resblock2_out)
+        poolblock_out = self.pool_block[0](resblock3_out)
+        poolblock_out = self.pool_block[1](poolblock_out)
+        GAN_feature = poolblock_out.transpose(-1, -2)
+        poolblock_out = self.pool_block[2](poolblock_out)
+        # (b, 256, 31, 2) => (b, 31, 256, 2) => (b, 31, 512)
+        classifier_out = poolblock_out.permute(0, 2, 1, 3).contiguous().view((-1, seq_len, 512))
+        classifier_out, _ = self.bilstm_classifier(classifier_out)  # ignore the hidden states
+        classifier_out = classifier_out.contiguous().view((-1, 512))  # (b * 31, 512)
+        classifier_out = self.classifier(classifier_out)
+        classifier_out = classifier_out.view((-1, seq_len, self.num_class))  # (b, 31, num_class)
+        # sizes: (b, 31, 722), (b, 31, 2)
+        # classifier output consists of predicted pitch classes per frame
+        # detector output consists of: (isvoice, notvoice) estimates per frame
+        return torch.abs(classifier_out.squeeze()), GAN_feature, poolblock_out
+    @staticmethod
+    def init_weights(m):
+        if isinstance(m, nn.Linear):
+            nn.init.kaiming_uniform_(m.weight)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.Conv2d):
+            nn.init.xavier_normal_(m.weight)
+        elif isinstance(m, nn.LSTM) or isinstance(m, nn.LSTMCell):
+            for p in m.parameters():
+                if p.data is None:
+                    continue
+                if len(p.shape) >= 2:
+                    nn.init.orthogonal_(p.data)
+                else:
+                    nn.init.normal_(p.data)
+class ResBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int, leaky_relu_slope=0.01):
+        super().__init__()
+        self.downsample = in_channels != out_channels
+        # BN / LReLU / MaxPool layer before the conv layer - see Figure 1b in the paper
+        self.pre_conv = nn.Sequential(
+            nn.BatchNorm2d(num_features=in_channels),
+            nn.LeakyReLU(leaky_relu_slope, inplace=True),
+            nn.MaxPool2d(kernel_size=(1, 2)),  # apply downsampling on the y axis only
+        )
+        # conv layers
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
+                      kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(leaky_relu_slope, inplace=True),
+            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
+        )
+        # 1 x 1 convolution layer to match the feature dimensions
+        self.conv1by1 = None
+        if self.downsample:
+            self.conv1by1 = nn.Conv2d(in_channels, out_channels, 1, bias=False)
+    def forward(self, x):
+        x = self.pre_conv(x)
+        if self.downsample:
+            x = self.conv(x) + self.conv1by1(x)
+        else:
+            x = self.conv(x) + x
+        return x

Utils/PLBERT/config.yml ADDED Viewed

	@@ -0,0 +1,30 @@

+log_dir: "Checkpoint"
+mixed_precision: "fp16"
+data_folder: "wikipedia_20220301.en.processed"
+batch_size: 192
+save_interval: 5000
+log_interval: 10
+num_process: 1 # number of GPUs
+num_steps: 1000000
+dataset_params:
+    tokenizer: "transfo-xl-wt103"
+    token_separator: " " # token used for phoneme separator (space)
+    token_mask: "M" # token used for phoneme mask (M)
+    word_separator: 3039 # token used for word separator (<formula>)
+    token_maps: "token_maps.pkl" # token map path
+    max_mel_length: 512 # max phoneme length
+    word_mask_prob: 0.15 # probability to mask the entire word
+    phoneme_mask_prob: 0.1 # probability to mask each phoneme
+    replace_prob: 0.2 # probablity to replace phonemes
+model_params:
+    vocab_size: 178
+    hidden_size: 768
+    num_attention_heads: 12
+    intermediate_size: 2048
+    max_position_embeddings: 512
+    num_hidden_layers: 12
+    dropout: 0.1

Utils/PLBERT/step_1000000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0714ff85804db43e06b3b0ac5749bf90cf206257c6c5916e8a98c5933b4c21e0
+size 25185187

Utils/PLBERT/util.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import os
+import yaml
+import torch
+from transformers import AlbertConfig, AlbertModel
+class CustomAlbert(AlbertModel):
+    def forward(self, *args, **kwargs):
+        # Call the original forward method
+        outputs = super().forward(*args, **kwargs)
+        # Only return the last_hidden_state
+        return outputs.last_hidden_state
+def load_plbert(log_dir):
+    config_path = os.path.join(log_dir, "config.yml")
+    plbert_config = yaml.safe_load(open(config_path))
+    albert_base_configuration = AlbertConfig(**plbert_config['model_params'])
+    bert = CustomAlbert(albert_base_configuration)
+    files = os.listdir(log_dir)
+    ckpts = []
+    for f in os.listdir(log_dir):
+        if f.startswith("step_"): ckpts.append(f)
+    iters = [int(f.split('_')[-1].split('.')[0]) for f in ckpts if os.path.isfile(os.path.join(log_dir, f))]
+    iters = sorted(iters)[-1]
+    checkpoint = torch.load(log_dir + "/step_" + str(iters) + ".pth", map_location='cpu')
+    state_dict = checkpoint['net']
+    from collections import OrderedDict
+    new_state_dict = OrderedDict()
+    for k, v in state_dict.items():
+        name = k[7:] # remove `module.`
+        if name.startswith('encoder.'):
+            name = name[8:] # remove `encoder.`
+            new_state_dict[name] = v
+    del new_state_dict["embeddings.position_ids"]
+    bert.load_state_dict(new_state_dict, strict=False)
+    return bert

Utils/config.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+{ASR_config: Utils/ASR/config.yml, ASR_path: Utils/ASR/epoch_00080.pth, F0_path: Utils/JDC/bst.t7,
+  PLBERT_dir: Utils/PLBERT/, batch_size: 8, data_params: {OOD_data: Data/OOD_texts.txt,
+    min_length: 50, root_path: '', train_data: Data/train_list.txt, val_data: Data/val_list.txt},
+  device: cuda, epochs_1st: 40, epochs_2nd: 25, first_stage_path: first_stage.pth,
+  load_only_params: false, log_dir: Models/LibriTTS, log_interval: 10, loss_params: {
+    TMA_epoch: 4, diff_epoch: 0, joint_epoch: 0, lambda_F0: 1.0, lambda_ce: 20.0,
+    lambda_diff: 1.0, lambda_dur: 1.0, lambda_gen: 1.0, lambda_mel: 5.0, lambda_mono: 1.0,
+    lambda_norm: 1.0, lambda_s2s: 1.0, lambda_slm: 1.0, lambda_sty: 1.0}, max_len: 300,
+  model_params: {decoder: {resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3,
+          5]], resblock_kernel_sizes: [3, 7, 11], type: hifigan, upsample_initial_channel: 512,
+      upsample_kernel_sizes: [20, 10, 6, 4], upsample_rates: [10, 5, 3, 2]}, diffusion: {
+      dist: {estimate_sigma_data: true, mean: -3.0, sigma_data: 0.19926648961191362,
+        std: 1.0}, embedding_mask_proba: 0.1, transformer: {head_features: 64, multiplier: 2,
+        num_heads: 8, num_layers: 3}}, dim_in: 64, dropout: 0.2, hidden_dim: 512,
+    max_conv_dim: 512, max_dur: 50, multispeaker: true, n_layer: 3, n_mels: 80, n_token: 178,
+    slm: {hidden: 768, initial_channel: 64, model: microsoft/wavlm-base-plus, nlayers: 13,
+      sr: 16000}, style_dim: 128}, optimizer_params: {bert_lr: 1.0e-05, ft_lr: 1.0e-05,
+    lr: 0.0001}, preprocess_params: {spect_params: {hop_length: 300, n_fft: 2048,
+      win_length: 1200}, sr: 24000}, pretrained_model: Models/LibriTTS/epoch_2nd_00002.pth,
+  save_freq: 1, second_stage_load_pretrained: true, slmadv_params: {batch_percentage: 0.5,
+    iter: 20, max_len: 500, min_len: 400, scale: 0.01, sig: 1.5, thresh: 5}}

annotated_features.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4d9a2bd9d3139441989c822d48fc96f887e0d3ea015d55dcb5df9b004836e4
+size 2576

app.py ADDED Viewed

	@@ -0,0 +1,506 @@

+#!/usr/bin/env python3
+import gradio as gr
+import numpy as np
+import torch
+import json
+import io
+import soundfile as sf
+from PIL import Image
+import matplotlib
+import joblib
+from sklearn.decomposition import PCA
+from collections import OrderedDict
+matplotlib.use("Agg")  # Use non-interactive backend
+import matplotlib.pyplot as plt
+from text2speech import tts_randomized, parse_speed, tts_with_style_vector
+# Constants and Paths
+VOICES_JSON_PATH = "voices.json"
+PCA_MODEL_PATH = "pca_model.pkl"
+ANNOTATED_FEATURES_PATH = "annotated_features.npy"
+VECTOR_DIMENSION = 256
+ANNOTATED_FEATURES_NAMES = ["Gender", "Tone", "Quality", "Enunciation", "Pace", "Style"]
+ANNOTATED_FEATURES_INFO = [
+    "Male | Female",
+    "High | Low",
+    "Noisy | Clean",
+    "Clear | Unclear",
+    "Rapid | Slow",
+    "Colloquial | Formal",
+]
+# Load PCA model and annotated features
+try:
+    pca = joblib.load(PCA_MODEL_PATH)
+    print("PCA model loaded successfully.")
+except FileNotFoundError:
+    print(f"Error: PCA model file '{PCA_MODEL_PATH}' not found.")
+    pca = None
+try:
+    annotated_features = np.load(ANNOTATED_FEATURES_PATH)
+    print("Annotated features loaded successfully.")
+except FileNotFoundError:
+    print(f"Error: Annotated features file '{ANNOTATED_FEATURES_PATH}' not found.")
+    annotated_features = None
+# Utility Functions
+def load_voices_json():
+    """Load the voices.json file."""
+    try:
+        with open(VOICES_JSON_PATH, "r") as f:
+            return json.load(f, object_pairs_hook=OrderedDict)
+    except FileNotFoundError:
+        print(f"Warning: {VOICES_JSON_PATH} not found. Creating a new one.")
+        return OrderedDict()
+    except json.JSONDecodeError:
+        print(f"Warning: {VOICES_JSON_PATH} is not valid JSON.")
+        return OrderedDict()
+def save_voices_json(data, path=VOICES_JSON_PATH):
+    """Save to voices.json."""
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+    print(f"Voices saved to '{path}'.")
+def update_sliders(voice_name):
+    """
+    Update slider values based on the selected predefined voice using reverse PCA.
+    Args:
+        voice_name (str): The name of the selected voice.
+    Returns:
+        list: A list of PCA component values to set the sliders.
+    """
+    if not voice_name:
+        # Return default slider values (e.g., zeros) if no voice is selected
+        return [0.0] * len(ANNOTATED_FEATURES_NAMES)
+    voices_data = load_voices_json()
+    if voice_name not in voices_data:
+        print(f"Voice '{voice_name}' not found in {VOICES_JSON_PATH}.")
+        return [0.0] * len(ANNOTATED_FEATURES_NAMES)
+    style_vector = np.array(voices_data[voice_name], dtype=np.float32).reshape(1, -1)
+    if pca is None:
+        print("PCA model is not loaded.")
+        return [0.0] * len(ANNOTATED_FEATURES_NAMES)
+    try:
+        # Transform the style vector into PCA component values
+        pca_components = pca.transform(style_vector)[0]
+        return pca_components.tolist()
+    except Exception as e:
+        print(f"Error transforming style vector to PCA components: {e}")
+        return [0.0] * len(ANNOTATED_FEATURES_NAMES)
+def generate_audio_with_voice(text, voice_key, speed_val):
+    """
+    Generate audio using the style vector of the selected predefined voice.
+    Args:
+        text (str): The text to synthesize.
+        voice_key (str): The name of the selected voice.
+        speed_val (float): The speed multiplier.
+    Returns:
+        tuple: (audio_tuple, style_vector)
+    """
+    try:
+        # Load voices data
+        voices_data = load_voices_json()
+        if voice_key not in voices_data:
+            print(f"Voice '{voice_key}' not found in {VOICES_JSON_PATH}.")
+            return None, None, "Selected voice not found."
+        # Retrieve the style vector for the selected voice
+        style_vector = np.array(voices_data[voice_key], dtype=np.float32).reshape(1, -1)
+        print(f"Selected Voice: {voice_key}")
+        print(f"Style Vector (First 6): {style_vector[0][:6]}")
+        # Convert to torch tensor
+        style_vec_torch = torch.from_numpy(style_vector).float()
+        # Generate audio using the TTS model
+        audio_np = tts_with_style_vector(
+            text,
+            style_vec=style_vec_torch,
+            speed=speed_val,
+            alpha=0.3,
+            beta=0.7,
+            diffusion_steps=7,
+            embedding_scale=1.0,
+        )
+        if audio_np is None:
+            print("Audio generation failed.")
+            return None, None, "Audio generation failed."
+        # Prepare audio for Gradio
+        sr = 24000  # Adjust based on your actual sampling rate
+        audio_tuple = (sr, audio_np)
+        # Return audio, image, and style vector
+        return audio_tuple, style_vector.tolist()
+    except Exception as e:
+        print(f"Error in generate_audio_with_voice: {e}")
+        return None, None, "An error occurred during audio generation."
+def build_modified_vector(voice_key, top6_values):
+    """Build a modified style vector by updating top 6 PCA components."""
+    voices_data = load_voices_json()
+    if voice_key not in voices_data:
+        print(f"Voice '{voice_key}' not found in {VOICES_JSON_PATH}.")
+        return None
+    arr = np.array(voices_data[voice_key], dtype=np.float32).squeeze()
+    if arr.ndim != 1 or arr.shape[0] != VECTOR_DIMENSION:
+        print(f"Voice '{voice_key}' has invalid shape {arr.shape}. Expected (256,).")
+        return None
+    try:
+        # Reconstruct the style vector using inverse PCA
+        pca_components = np.array(top6_values).reshape(1, -1)
+        reconstructed_vec = pca.inverse_transform(pca_components)[0]
+        return reconstructed_vec
+    except Exception as e:
+        print(f"Error reconstructing style vector: {e}")
+        return None
+def reconstruct_style_vector(pca_components):
+    """
+    Reconstruct the 256-dimensional style vector from PCA components.
+    """
+    if pca is None:
+        print("PCA model is not loaded.")
+        return None
+    try:
+        return pca.inverse_transform([pca_components])[0]
+    except Exception as e:
+        print(f"Error during inverse PCA transform: {e}")
+        return None
+def generate_custom_audio(text, voice_key, randomize, speed_str, *slider_values):
+    """
+    Generate audio and produce a matplotlib plot of the style vector.
+    Returns:
+      - audio tuple (sr, np_array) for Gradio's Audio
+      - a PIL Image representing the style vector plot
+      - the final style vector as a list for State
+    """
+    try:
+        speed_val = parse_speed(speed_str)
+        print(f"Parsed speed: {speed_val}")
+        if randomize:
+            # Generate randomized style vector
+            audio_np, random_style_vec = tts_randomized(text, speed=speed_val)
+            if random_style_vec is None:
+                print("Failed to generate randomized style vector.")
+                return None, None, None
+            # Ensure the style vector is flat
+            final_vec = (
+                random_style_vec.numpy().flatten()
+                if isinstance(random_style_vec, torch.Tensor)
+                else np.array(random_style_vec).flatten()
+            )
+            print("Randomized Style Vector (First 6):", final_vec[:6])
+        else:
+            # Reconstruct the style vector from slider values using inverse PCA
+            reconstructed_vec = build_modified_vector(voice_key, slider_values)
+            if reconstructed_vec is None:
+                print(
+                    "No reconstructed vector could be constructed, skipping audio generation."
+                )
+                return None, None, None
+            # Convert to torch tensor
+            style_vec_torch = torch.from_numpy(reconstructed_vec).float().unsqueeze(0)
+            # Generate audio with the reconstructed style vector
+            audio_np = tts_with_style_vector(
+                text,
+                style_vec=style_vec_torch,
+                speed=speed_val,
+                alpha=0.3,
+                beta=0.7,
+                diffusion_steps=7,
+                embedding_scale=1.0,
+            )
+            final_vec = reconstructed_vec
+            print("Reconstructed Style Vector (First 6):", final_vec[:6])
+        if audio_np is None:
+            print("Audio generation failed.")
+            return None, None, None
+        # Prepare audio for Gradio
+        sr = 24000  # Adjust based on your actual sampling rate
+        audio_tuple = (sr, audio_np)
+        # Return audio, image, and style vector
+        return audio_tuple, final_vec.tolist()
+    except Exception as e:
+        print(f"Error generating audio and style plot: {e}")
+        return None, None, None
+def save_style_to_json(style_data, style_name):
+    """Saves the provided style_data (list of floats) into voices.json under style_name."""
+    if not style_name.strip():
+        return "Please enter a new style name before saving."
+    voices_data = load_voices_json()
+    if style_name in voices_data:
+        return (
+            f"Style name '{style_name}' already exists. Please choose a different name."
+        )
+    # Ensure the style_data has the correct length
+    if len(style_data) != VECTOR_DIMENSION:
+        return f"Style vector length mismatch. Expected {VECTOR_DIMENSION}, got {len(style_data)}."
+    # Save the style vector
+    voices_data[style_name] = style_data
+    save_voices_json(voices_data)
+    return f"Saved style as '{style_name}' in {VOICES_JSON_PATH}."
+# Gradio Interface Functions
+def rearrange_voices(new_order):
+    """Rearrange the voices based on the new_order list."""
+    voices_data = load_voices_json()
+    new_order_list = [name.strip() for name in new_order.split(",")]
+    if not all(name in voices_data for name in new_order_list):
+        return "Error: New order contains invalid voice names.", list(
+            voices_data.keys()
+        )
+    ordered_data = OrderedDict()
+    for name in new_order_list:
+        ordered_data[name] = voices_data[name]
+    save_voices_json(ordered_data)
+    print(f"Voices rearranged: {list(ordered_data.keys())}")
+    return "Voices rearranged successfully.", list(ordered_data.keys())
+def delete_voice(selected):
+    """Delete voices from the voices.json."""
+    if not selected:
+        return "No voices selected for deletion.", list(load_voices_json().keys())
+    voices_data = load_voices_json()
+    for voice_name in selected:
+        if voice_name in voices_data:
+            del voices_data[voice_name]
+            print(f"Voice '{voice_name}' deleted.")
+    save_voices_json(voices_data)
+    return "Deleted selected voices successfully.", list(voices_data.keys())
+def upload_new_voices(uploaded_file):
+    """Upload new voices from a JSON file."""
+    if uploaded_file is None:
+        return "No file uploaded.", list(load_voices_json().keys())
+    try:
+        uploaded_data = json.load(uploaded_file)
+        if not isinstance(uploaded_data, dict):
+            return "Invalid JSON format. Expected a dictionary of voices.", list(
+                load_voices_json().keys()
+            )
+        voices_data = load_voices_json()
+        voices_data.update(uploaded_data)
+        save_voices_json(voices_data)
+        print(f"Voices uploaded: {list(uploaded_data.keys())}")
+        return "Voices uploaded successfully.", list(voices_data.keys())
+    except json.JSONDecodeError:
+        return "Uploaded file is not valid JSON.", list(load_voices_json().keys())
+# Create Gradio Interface with Tabs
+def create_combined_interface():
+    voices_data = load_voices_json()
+    voice_choices = list(voices_data.keys())
+    default_voice = voice_choices[0] if voice_choices else None
+    css = """
+    h4 {
+        text-align: center;
+        display:block;
+    }
+    """
+    def refresh_voices():
+        """Refresh the voices by reloading the JSON."""
+        new_choices = list(load_voices_json().keys())
+        print(f"Voices refreshed: {new_choices}")
+        return gr.Dropdown(choices=new_choices)
+    with gr.Blocks(theme=gr.themes.Ocean(), css=css) as demo:
+        gr.Markdown("# StyleTTS2 Studio - Build custom voices")
+        # ----------- Text-to-Speech Tab -----------
+        with gr.Tab("Text-to-Speech"):
+            gr.Markdown("### Generate Speech with Predefined Voices")
+            with gr.Column():
+                text_input = gr.Textbox(
+                    label="Text to Synthesize",
+                    value="Hello world from the Gradio + TTS pipeline!",
+                    lines=3,
+                )
+                voice_dropdown = gr.Dropdown(
+                    choices=voice_choices,
+                    label="Select Base Voice",
+                    value=default_voice,
+                    interactive=True,
+                )
+                speed_slider = gr.Slider(
+                    minimum=50,
+                    maximum=200,
+                    step=1,
+                    label="Speed (%)",
+                    value=100,
+                )
+                with gr.Row():
+                    generate_btn = gr.Button("Generate Audio")
+            audio_output = gr.Audio(label="Synthesized Audio")
+            # Generate button functionality
+            def on_generate_tts(text, voice, speed):
+                if not voice:
+                    return None, "No voice selected."
+                speed_val = speed / 100  # Convert percentage to multiplier
+                audio, style_vector = generate_audio_with_voice(text, voice, speed_val)
+                if audio is None:
+                    return None, style_vector  # style_vector contains the error message
+                return audio, "Audio generated successfully."
+            generate_btn.click(
+                fn=on_generate_tts,
+                inputs=[text_input, voice_dropdown, speed_slider],
+                outputs=[audio_output, gr.Textbox(label="Status", visible=False)],
+            )
+        # ----------- Voice Studio Tab -----------
+        with gr.Tab("Voice Studio"):
+            gr.Markdown("### Customize and Create New Voices")
+            with gr.Column():
+                text_input_studio = gr.Textbox(
+                    label="Text to Synthesize",
+                    value="Customize your voice here!",
+                    lines=3,
+                )
+                voice_dropdown_studio = gr.Dropdown(
+                    choices=voice_choices,
+                    label="Select Base Voice",
+                    value=default_voice,
+                )
+                speed_slider_studio = gr.Slider(
+                    minimum=50,
+                    maximum=200,
+                    step=1,
+                    label="Speed (%)",
+                    value=100,
+                )
+                # Sliders for PCA components (6 sliders)
+                pca_sliders = [
+                    gr.Slider(
+                        minimum=-2.0,
+                        maximum=2.0,
+                        value=0.0,
+                        step=0.1,
+                        label=feature,
+                    )
+                    for feature in ANNOTATED_FEATURES_NAMES
+                ]
+            generate_btn_studio = gr.Button("Generate Customized Audio")
+            audio_output_studio = gr.Audio(label="Customized Synthesized Audio")
+            new_style_name = gr.Textbox(label="New Style Name", value="")
+            save_btn_studio = gr.Button("Save Customized Voice")
+            status_text = gr.Textbox(label="Status", visible=True)
+            # State to hold the last style vector
+            style_vector_state_studio = gr.State()
+            # Generate button functionality
+            def on_generate_studio(text, voice, speed, *pca_values):
+                if not voice:
+                    return None, "No voice selected.", None
+                speed_val = speed / 100  # Convert percentage to multiplier
+                result = generate_custom_audio(
+                    text, voice, False, speed_val, *pca_values
+                )
+                if result is None:
+                    return None, "Failed to generate audio.", None
+                audio_tuple, style_vector = result
+                style_vector_state_studio.value = style_vector
+                return audio_tuple, "Audio generated successfully.", style_vector
+            generate_btn_studio.click(
+                fn=on_generate_studio,
+                inputs=[text_input_studio, voice_dropdown_studio, speed_slider_studio]
+                + pca_sliders,
+                outputs=[audio_output_studio, status_text, style_vector_state_studio],
+            )
+            # Save button functionality
+            def on_save_style_studio(style_vector, style_name):
+                if not style_name:
+                    return "Please enter a name for the new voice!"
+                result = save_style_to_json(style_vector, style_name)
+                new_choices = list(load_voices_json().keys())
+                # Return multiple values to update both dropdowns and show status
+                return (
+                    gr.Dropdown(choices=new_choices),  # Update first dropdown
+                    gr.Dropdown(choices=new_choices),  # Update studio dropdown
+                    result,  # Status message
+                )
+            save_btn_studio.click(
+                fn=on_save_style_studio,
+                inputs=[style_vector_state_studio, new_style_name],
+                outputs=[voice_dropdown, voice_dropdown_studio, status_text],
+            )
+            # Add callback to update sliders when a voice is selected
+            voice_dropdown_studio.change(
+                fn=update_sliders,
+                inputs=voice_dropdown_studio,
+                outputs=pca_sliders,
+            )
+        gr.Markdown(
+            "#### Based on [StyleTTS2](https://github.com/yl4579/StyleTTS2) and [artificial StyleTTS2](https://huggingface.co/dkounadis/artificial-styletts2/tree/main)"
+        )
+    return demo
+if __name__ == "__main__":
+    try:
+        interface = create_combined_interface()
+        interface.launch(share=False)
+    except Exception as e:
+        print(f"An error occurred while launching the interface: {e}")

espeak_util.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import platform
+import subprocess
+import shutil
+from pathlib import Path
+import os
+from typing import Optional, Tuple
+from phonemizer.backend.espeak.wrapper import EspeakWrapper
+class EspeakConfig:
+    """Utility class for configuring espeak-ng library and binary."""
+    @staticmethod
+    def find_espeak_binary() -> tuple[bool, Optional[str]]:
+        """
+        Find espeak-ng binary using multiple methods.
+        Returns:
+            tuple: (bool indicating if espeak is available, path to espeak binary if found)
+        """
+        # Common binary names
+        binary_names = ["espeak-ng", "espeak"]
+        if platform.system() == "Windows":
+            binary_names = ["espeak-ng.exe", "espeak.exe"]
+        # Common installation directories for Linux
+        linux_paths = [
+            "/usr/bin",
+            "/usr/local/bin",
+            "/usr/lib/espeak-ng",
+            "/usr/local/lib/espeak-ng",
+            "/opt/espeak-ng/bin",
+        ]
+        # First check if it's in PATH
+        for name in binary_names:
+            espeak_path = shutil.which(name)
+            if espeak_path:
+                return True, espeak_path
+        # For Linux, check common installation directories
+        if platform.system() == "Linux":
+            for directory in linux_paths:
+                for name in binary_names:
+                    path = Path(directory) / name
+                    if path.exists():
+                        return True, str(path)
+        # Try running the command directly as a last resort
+        try:
+            subprocess.run(
+                ["espeak-ng", "--version"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                check=True,
+            )
+            return True, "espeak-ng"
+        except (subprocess.SubprocessError, FileNotFoundError):
+            pass
+        return False, None
+    @staticmethod
+    def find_library_path() -> Optional[str]:
+        """
+        Find the espeak-ng library using multiple search methods.
+        Returns:
+            Optional[str]: Path to the library if found, None otherwise
+        """
+        system = platform.system()
+        if system == "Linux":
+            lib_names = ["libespeak-ng.so", "libespeak-ng.so.1"]
+            common_paths = [
+                # Debian/Ubuntu paths
+                "/usr/lib/x86_64-linux-gnu",
+                "/usr/lib/aarch64-linux-gnu",  # For ARM64
+                "/usr/lib/arm-linux-gnueabihf",  # For ARM32
+                "/usr/lib",
+                "/usr/local/lib",
+                # Fedora/RHEL paths
+                "/usr/lib64",
+                "/usr/lib32",
+                # Common additional paths
+                "/usr/lib/espeak-ng",
+                "/usr/local/lib/espeak-ng",
+                "/opt/espeak-ng/lib",
+            ]
+            # Check common locations first
+            for path in common_paths:
+                for lib_name in lib_names:
+                    lib_path = Path(path) / lib_name
+                    if lib_path.exists():
+                        return str(lib_path)
+            # Search system library paths
+            try:
+                # Use ldconfig to find the library
+                result = subprocess.run(
+                    ["ldconfig", "-p"], capture_output=True, text=True, check=True
+                )
+                for line in result.stdout.splitlines():
+                    if "libespeak-ng.so" in line:
+                        # Extract path from ldconfig output
+                        return line.split("=>")[-1].strip()
+            except (subprocess.SubprocessError, FileNotFoundError):
+                pass
+        elif system == "Darwin":  # macOS
+            common_paths = [
+                Path("/opt/homebrew/lib/libespeak-ng.dylib"),
+                Path("/usr/local/lib/libespeak-ng.dylib"),
+                *list(
+                    Path("/opt/homebrew/Cellar/espeak-ng").glob(
+                        "*/lib/libespeak-ng.dylib"
+                    )
+                ),
+                *list(
+                    Path("/usr/local/Cellar/espeak-ng").glob("*/lib/libespeak-ng.dylib")
+                ),
+            ]
+            for path in common_paths:
+                if path.exists():
+                    return str(path)
+        elif system == "Windows":
+            common_paths = [
+                Path(os.environ.get("PROGRAMFILES", "C:\\Program Files"))
+                / "eSpeak NG"
+                / "libespeak-ng.dll",
+                Path(os.environ.get("PROGRAMFILES(X86)", "C:\\Program Files (x86)"))
+                / "eSpeak NG"
+                / "libespeak-ng.dll",
+                *[
+                    Path(p) / "libespeak-ng.dll"
+                    for p in os.environ.get("PATH", "").split(os.pathsep)
+                ],
+            ]
+            for path in common_paths:
+                if path.exists():
+                    return str(path)
+        return None
+    @classmethod
+    def configure_espeak(cls) -> Tuple[bool, str]:
+        """
+        Configure espeak-ng for use with the phonemizer.
+        Returns:
+            Tuple[bool, str]: (Success status, Status message)
+        """
+        # First check if espeak binary is available
+        espeak_available, espeak_path = cls.find_espeak_binary()
+        if not espeak_available:
+            raise FileNotFoundError(
+                "Could not find espeak-ng binary. Please install espeak-ng:\n"
+                "Ubuntu/Debian: sudo apt-get install espeak-ng espeak-ng-data\n"
+                "Fedora: sudo dnf install espeak-ng\n"
+                "Arch: sudo pacman -S espeak-ng\n"
+                "MacOS: brew install espeak-ng\n"
+                "Windows: Download from https://github.com/espeak-ng/espeak-ng/releases"
+            )
+        # Find the library
+        library_path = cls.find_library_path()
+        if not library_path:
+            # On Linux, we might not need to explicitly set the library path
+            if platform.system() == "Linux":
+                return True, f"Using system espeak-ng installation at: {espeak_path}"
+            else:
+                raise FileNotFoundError(
+                    "Could not find espeak-ng library. Please ensure espeak-ng is properly installed."
+                )
+        # Try to set the library path
+        try:
+            EspeakWrapper.set_library(library_path)
+            return True, f"Successfully configured espeak-ng library at: {library_path}"
+        except Exception as e:
+            if platform.system() == "Linux":
+                # On Linux, try to continue without explicit library path
+                return True, f"Using system espeak-ng installation at: {espeak_path}"
+            else:
+                raise RuntimeError(f"Failed to configure espeak-ng library: {str(e)}")
+def setup_espeak():
+    """
+    Set up espeak-ng for use with the phonemizer.
+    Raises appropriate exceptions if setup fails.
+    """
+    try:
+        success, message = EspeakConfig.configure_espeak()
+        print(message)
+    except Exception as e:
+        print(f"Error configuring espeak-ng: {str(e)}")
+        raise
+# Replace the original set_espeak_library function with this
+set_espeak_library = setup_espeak

inference.py ADDED Viewed

	@@ -0,0 +1,315 @@

+import yaml
+import random
+import librosa
+import numpy as np
+import phonemizer
+import torch
+import torchaudio
+from collections import OrderedDict
+from munch import Munch
+from nltk.tokenize import word_tokenize
+from cached_path import cached_path
+# Local or project imports
+from models import *
+from espeak_util import set_espeak_library
+from Utils.PLBERT.util import load_plbert
+from Modules.diffusion.sampler import DiffusionSampler, ADPM2Sampler, KarrasSchedule
+# -----------------------------------------------------------------------------
+# SEEDS AND DETERMINISM
+# -----------------------------------------------------------------------------
+random.seed(0)
+np.random.seed(0)
+torch.manual_seed(0)
+torch.backends.cudnn.benchmark = False
+torch.backends.cudnn.deterministic = True
+# -----------------------------------------------------------------------------
+# CONSTANTS / CHARACTERS
+# -----------------------------------------------------------------------------
+_pad = "$"
+_punctuation = ';:,.!?¡¿—…"«»“” '
+_letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+_letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
+symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
+dicts = {symbols[i]: i for i in range(len(symbols))}
+# -----------------------------------------------------------------------------
+# TEXT CLEANER
+# -----------------------------------------------------------------------------
+class TextCleaner:
+    """
+    Maps individual characters to their corresponding indices.
+    If an unknown character is found, it prints a warning.
+    """
+    def __init__(self, dummy=None):
+        self.word_index_dictionary = dicts
+        print(len(dicts))
+    def __call__(self, text):
+        indexes = []
+        for char in text:
+            try:
+                indexes.append(self.word_index_dictionary[char])
+            except KeyError:
+                print("CLEAN", text)
+        return indexes
+textclenaer = TextCleaner()
+# -----------------------------------------------------------------------------
+# AUDIO PROCESSING
+# -----------------------------------------------------------------------------
+to_mel = torchaudio.transforms.MelSpectrogram(
+    n_mels=80, n_fft=2048, win_length=1200, hop_length=300
+)
+mean, std = -4, 4
+def preprocess(wave: np.ndarray) -> torch.Tensor:
+    """
+    Convert a NumPy audio array into a normalized mel spectrogram tensor.
+    """
+    wave_tensor = torch.from_numpy(wave).float()
+    mel_tensor = to_mel(wave_tensor)
+    mel_tensor = (torch.log(1e-5 + mel_tensor.unsqueeze(0)) - mean) / std
+    return mel_tensor
+def length_to_mask(lengths: torch.Tensor) -> torch.Tensor:
+    """
+    Return a boolean mask based on the lengths of each item in the batch.
+    """
+    max_len = lengths.max()
+    mask = (
+        torch.arange(max_len).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+    )
+    mask = torch.gt(mask + 1, lengths.unsqueeze(1))
+    return mask
+# -----------------------------------------------------------------------------
+# MISC UTILS
+# -----------------------------------------------------------------------------
+def recursive_munch(d):
+    """
+    Recursively convert dictionaries to Munch objects.
+    """
+    if isinstance(d, dict):
+        return Munch((k, recursive_munch(v)) for k, v in d.items())
+    elif isinstance(d, list):
+        return [recursive_munch(v) for v in d]
+    else:
+        return d
+def compute_style(path: str) -> torch.Tensor:
+    """
+    Load an audio file, trim it, resample if needed, then
+    compute and return a style vector by passing through the style encoder
+    and predictor encoder.
+    """
+    wave, sr = librosa.load(path, sr=24000)
+    audio, _ = librosa.effects.trim(wave, top_db=30)
+    if sr != 24000:
+        audio = librosa.resample(audio, sr, 24000)
+    mel_tensor = preprocess(audio).to(device)
+    with torch.no_grad():
+        ref_s = model.style_encoder(mel_tensor.unsqueeze(1))
+        ref_p = model.predictor_encoder(mel_tensor.unsqueeze(1))
+    return torch.cat([ref_s, ref_p], dim=1)
+# -----------------------------------------------------------------------------
+# DEVICE SELECTION
+# -----------------------------------------------------------------------------
+device = "cpu"
+if torch.cuda.is_available():
+    device = "cuda"
+elif torch.backends.mps.is_available():
+    # Optionally enable MPS if appropriate (commented out by default).
+    # device = "mps"
+    pass
+# -----------------------------------------------------------------------------
+# PHONEMIZER INITIALIZATION
+# -----------------------------------------------------------------------------
+set_espeak_library()
+global_phonemizer = phonemizer.backend.EspeakBackend(
+    language="en-us", preserve_punctuation=True, with_stress=True
+)
+# -----------------------------------------------------------------------------
+# LOAD CONFIG
+# -----------------------------------------------------------------------------
+config = yaml.safe_load(open("Utils/config.yml"))
+# -----------------------------------------------------------------------------
+# LOAD MODELS
+# -----------------------------------------------------------------------------
+ASR_config = config.get("ASR_config", False)
+ASR_path = config.get("ASR_path", False)
+text_aligner = load_ASR_models(ASR_path, ASR_config)
+F0_path = config.get("F0_path", False)
+pitch_extractor = load_F0_models(F0_path)
+from Utils.PLBERT.util import load_plbert
+BERT_path = config.get("PLBERT_dir", False)
+plbert = load_plbert(BERT_path)
+model_params = recursive_munch(config["model_params"])
+model = build_model(model_params, text_aligner, pitch_extractor, plbert)
+_ = [model[key].eval() for key in model]
+_ = [model[key].to(device) for key in model]
+params_whole = torch.load(
+    str(
+        cached_path(
+            "hf://yl4579/StyleTTS2-LibriTTS/Models/LibriTTS/epochs_2nd_00020.pth"
+        )
+    ),
+    map_location="cpu",
+)
+params = params_whole["net"]
+# Load model states
+for key in model:
+    if key in params:
+        print(f"{key} loaded")
+        try:
+            model[key].load_state_dict(params[key])
+        except RuntimeError:
+            state_dict = params[key]
+            new_state_dict = OrderedDict()
+            for k, v in state_dict.items():
+                name = k[7:]  # remove `module.`
+                new_state_dict[name] = v
+            model[key].load_state_dict(new_state_dict, strict=False)
+_ = [model[key].eval() for key in model]
+sampler = DiffusionSampler(
+    model.diffusion.diffusion,
+    sampler=ADPM2Sampler(),
+    sigma_schedule=KarrasSchedule(sigma_min=0.0001, sigma_max=3.0, rho=9.0),
+    clamp=False,
+)
+# -----------------------------------------------------------------------------
+# INFERENCE
+# -----------------------------------------------------------------------------
+def inference(
+    text: str,
+    ref_s: torch.Tensor,
+    alpha: float = 0.3,
+    beta: float = 0.7,
+    diffusion_steps: int = 5,
+    embedding_scale: float = 1,
+    speed: float = 1.2,
+):
+    """
+    Perform TTS inference using StyleTTS2 architecture.
+    Args:
+        text (str): The input text to be synthesized.
+        ref_s (torch.Tensor): The reference style/predictor embedding.
+        alpha (float): Interpolation factor for the style encoder.
+        beta (float): Interpolation factor for the predictor encoder.
+        diffusion_steps (int): Number of diffusion steps.
+        embedding_scale (float): Scaling factor for the BERT embedding.
+        speed (float): Speed factor e.g. 1.2 will speed up the audio by 20%
+    Returns:
+        np.ndarray: Audio waveform (synthesized speech).
+    """
+    text = text.strip()
+    # Phonemize
+    ps = global_phonemizer.phonemize([text])
+    ps = word_tokenize(ps[0])
+    ps = " ".join(ps)
+    tokens = textclenaer(ps)
+    tokens.insert(0, 0)  # Insert padding index at the start
+    tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
+    with torch.no_grad():
+        input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
+        text_mask = length_to_mask(input_lengths).to(device)
+        # Text encoder
+        t_en = model.text_encoder(tokens, input_lengths, text_mask)
+        # BERT duration encoding
+        bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
+        d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
+        # Sampler for style
+        noise = torch.randn((1, 256)).unsqueeze(1).to(device)
+        s_pred = sampler(
+            noise=noise,
+            embedding=bert_dur,
+            embedding_scale=embedding_scale,
+            features=ref_s,
+            num_steps=diffusion_steps,
+        ).squeeze(1)
+        # Split the style vector
+        s_style = s_pred[:, 128:]
+        s_ref = s_pred[:, :128]
+        # Interpolate with ref_s
+        s_ref = alpha * s_ref + (1 - alpha) * ref_s[:, :128]
+        s_style = beta * s_style + (1 - beta) * ref_s[:, 128:]
+        # Predictor
+        d = model.predictor.text_encoder(d_en, s_style, input_lengths, text_mask)
+        x, _ = model.predictor.lstm(d)
+        duration = model.predictor.duration_proj(x)
+        duration = torch.sigmoid(duration).sum(axis=-1)
+        duration = duration / speed  # change speed
+        # Create alignment
+        pred_dur = torch.round(duration.squeeze()).clamp(min=1)
+        pred_aln_trg = torch.zeros(input_lengths, int(pred_dur.sum().data))
+        c_frame = 0
+        for i in range(pred_aln_trg.size(0)):
+            pd = int(pred_dur[i].data)
+            pred_aln_trg[i, c_frame : c_frame + pd] = 1
+            c_frame += pd
+        # Encode prosody
+        en = d.transpose(-1, -2) @ pred_aln_trg.unsqueeze(0).to(device)
+        if model_params.decoder.type == "hifigan":
+            asr_new = torch.zeros_like(en)
+            asr_new[:, :, 0] = en[:, :, 0]
+            asr_new[:, :, 1:] = en[:, :, 0:-1]
+            en = asr_new
+        F0_pred, N_pred = model.predictor.F0Ntrain(en, s_style)
+        # ASR-based encoding
+        asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
+        if model_params.decoder.type == "hifigan":
+            asr_new = torch.zeros_like(asr)
+            asr_new[:, :, 0] = asr[:, :, 0]
+            asr_new[:, :, 1:] = asr[:, :, 0:-1]
+            asr = asr_new
+        out = model.decoder(asr, F0_pred, N_pred, s_ref.squeeze().unsqueeze(0))
+    # Return waveform without the last 50 samples (as per original code)
+    return out.squeeze().cpu().numpy()[..., :-50]

models.py ADDED Viewed

	@@ -0,0 +1,611 @@

+#coding:utf-8
+import os
+import os.path as osp
+import copy
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
+from Utils.ASR.models import ASRCNN
+from Utils.JDC.model import JDCNet
+from Modules.diffusion.sampler import KDiffusion, LogNormalDistribution
+from Modules.diffusion.modules import Transformer1d, StyleTransformer1d
+from Modules.diffusion.diffusion import AudioDiffusionConditional
+from munch import Munch
+import yaml
+class LearnedDownSample(nn.Module):
+    def __init__(self, layer_type, dim_in):
+        super().__init__()
+        self.layer_type = layer_type
+        if self.layer_type == 'none':
+            self.conv = nn.Identity()
+        elif self.layer_type == 'timepreserve':
+            self.conv = spectral_norm(nn.Conv2d(dim_in, dim_in, kernel_size=(3, 1), stride=(2, 1), groups=dim_in, padding=(1, 0)))
+        elif self.layer_type == 'half':
+            self.conv = spectral_norm(nn.Conv2d(dim_in, dim_in, kernel_size=(3, 3), stride=(2, 2), groups=dim_in, padding=1))
+        else:
+            raise RuntimeError('Got unexpected donwsampletype %s, expected is [none, timepreserve, half]' % self.layer_type)
+    def forward(self, x):
+        return self.conv(x)
+class LearnedUpSample(nn.Module):
+    def __init__(self, layer_type, dim_in):
+        super().__init__()
+        self.layer_type = layer_type
+        if self.layer_type == 'none':
+            self.conv = nn.Identity()
+        elif self.layer_type == 'timepreserve':
+            self.conv = nn.ConvTranspose2d(dim_in, dim_in, kernel_size=(3, 1), stride=(2, 1), groups=dim_in, output_padding=(1, 0), padding=(1, 0))
+        elif self.layer_type == 'half':
+            self.conv = nn.ConvTranspose2d(dim_in, dim_in, kernel_size=(3, 3), stride=(2, 2), groups=dim_in, output_padding=1, padding=1)
+        else:
+            raise RuntimeError('Got unexpected upsampletype %s, expected is [none, timepreserve, half]' % self.layer_type)
+    def forward(self, x):
+        return self.conv(x)
+class DownSample(nn.Module):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        elif self.layer_type == 'timepreserve':
+            return F.avg_pool2d(x, (2, 1))
+        elif self.layer_type == 'half':
+            if x.shape[-1] % 2 != 0:
+                x = torch.cat([x, x[..., -1].unsqueeze(-1)], dim=-1)
+            return F.avg_pool2d(x, 2)
+        else:
+            raise RuntimeError('Got unexpected donwsampletype %s, expected is [none, timepreserve, half]' % self.layer_type)
+class UpSample(nn.Module):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        elif self.layer_type == 'timepreserve':
+            return F.interpolate(x, scale_factor=(2, 1), mode='nearest')
+        elif self.layer_type == 'half':
+            return F.interpolate(x, scale_factor=2, mode='nearest')
+        else:
+            raise RuntimeError('Got unexpected upsampletype %s, expected is [none, timepreserve, half]' % self.layer_type)
+class ResBlk(nn.Module):
+    def __init__(self, dim_in, dim_out, actv=nn.LeakyReLU(0.2),
+                 normalize=False, downsample='none'):
+        super().__init__()
+        self.actv = actv
+        self.normalize = normalize
+        self.downsample = DownSample(downsample)
+        self.downsample_res = LearnedDownSample(downsample, dim_in)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out)
+    def _build_weights(self, dim_in, dim_out):
+        self.conv1 = spectral_norm(nn.Conv2d(dim_in, dim_in, 3, 1, 1))
+        self.conv2 = spectral_norm(nn.Conv2d(dim_in, dim_out, 3, 1, 1))
+        if self.normalize:
+            self.norm1 = nn.InstanceNorm2d(dim_in, affine=True)
+            self.norm2 = nn.InstanceNorm2d(dim_in, affine=True)
+        if self.learned_sc:
+            self.conv1x1 = spectral_norm(nn.Conv2d(dim_in, dim_out, 1, 1, 0, bias=False))
+    def _shortcut(self, x):
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        if self.downsample:
+            x = self.downsample(x)
+        return x
+    def _residual(self, x):
+        if self.normalize:
+            x = self.norm1(x)
+        x = self.actv(x)
+        x = self.conv1(x)
+        x = self.downsample_res(x)
+        if self.normalize:
+            x = self.norm2(x)
+        x = self.actv(x)
+        x = self.conv2(x)
+        return x
+    def forward(self, x):
+        x = self._shortcut(x) + self._residual(x)
+        return x / math.sqrt(2)  # unit variance
+class StyleEncoder(nn.Module):
+    def __init__(self, dim_in=48, style_dim=48, max_conv_dim=384):
+        super().__init__()
+        blocks = []
+        blocks += [spectral_norm(nn.Conv2d(1, dim_in, 3, 1, 1))]
+        repeat_num = 4
+        for _ in range(repeat_num):
+            dim_out = min(dim_in*2, max_conv_dim)
+            blocks += [ResBlk(dim_in, dim_out, downsample='half')]
+            dim_in = dim_out
+        blocks += [nn.LeakyReLU(0.2)]
+        blocks += [spectral_norm(nn.Conv2d(dim_out, dim_out, 5, 1, 0))]
+        blocks += [nn.AdaptiveAvgPool2d(1)]
+        blocks += [nn.LeakyReLU(0.2)]
+        self.shared = nn.Sequential(*blocks)
+        self.unshared = nn.Linear(dim_out, style_dim)
+    def forward(self, x):
+        h = self.shared(x)
+        h = h.view(h.size(0), -1)
+        s = self.unshared(h)
+        return s
+class LinearNorm(torch.nn.Module):
+    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
+        super(LinearNorm, self).__init__()
+        self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
+        torch.nn.init.xavier_uniform_(
+            self.linear_layer.weight,
+            gain=torch.nn.init.calculate_gain(w_init_gain))
+    def forward(self, x):
+        return self.linear_layer(x)
+class ResBlk1d(nn.Module):
+    def __init__(self, dim_in, dim_out, actv=nn.LeakyReLU(0.2),
+                 normalize=False, downsample='none', dropout_p=0.2):
+        super().__init__()
+        self.actv = actv
+        self.normalize = normalize
+        self.downsample_type = downsample
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out)
+        self.dropout_p = dropout_p
+        if self.downsample_type == 'none':
+            self.pool = nn.Identity()
+        else:
+            self.pool = weight_norm(nn.Conv1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1))
+    def _build_weights(self, dim_in, dim_out):
+        self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_in, 3, 1, 1))
+        self.conv2 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
+        if self.normalize:
+            self.norm1 = nn.InstanceNorm1d(dim_in, affine=True)
+            self.norm2 = nn.InstanceNorm1d(dim_in, affine=True)
+        if self.learned_sc:
+            self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
+    def downsample(self, x):
+        if self.downsample_type == 'none':
+            return x
+        else:
+            if x.shape[-1] % 2 != 0:
+                x = torch.cat([x, x[..., -1].unsqueeze(-1)], dim=-1)
+            return F.avg_pool1d(x, 2)
+    def _shortcut(self, x):
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        x = self.downsample(x)
+        return x
+    def _residual(self, x):
+        if self.normalize:
+            x = self.norm1(x)
+        x = self.actv(x)
+        x = F.dropout(x, p=self.dropout_p, training=self.training)
+        x = self.conv1(x)
+        x = self.pool(x)
+        if self.normalize:
+            x = self.norm2(x)
+        x = self.actv(x)
+        x = F.dropout(x, p=self.dropout_p, training=self.training)
+        x = self.conv2(x)
+        return x
+    def forward(self, x):
+        x = self._shortcut(x) + self._residual(x)
+        return x / math.sqrt(2)  # unit variance
+class LayerNorm(nn.Module):
+    def __init__(self, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+        self.gamma = nn.Parameter(torch.ones(channels))
+        self.beta = nn.Parameter(torch.zeros(channels))
+    def forward(self, x):
+        x = x.transpose(1, -1)
+        x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps)
+        return x.transpose(1, -1)
+class TextEncoder(nn.Module):
+    def __init__(self, channels, kernel_size, depth, n_symbols, actv=nn.LeakyReLU(0.2)):
+        super().__init__()
+        self.embedding = nn.Embedding(n_symbols, channels)
+        padding = (kernel_size - 1) // 2
+        self.cnn = nn.ModuleList()
+        for _ in range(depth):
+            self.cnn.append(nn.Sequential(
+                weight_norm(nn.Conv1d(channels, channels, kernel_size=kernel_size, padding=padding)),
+                LayerNorm(channels),
+                actv,
+                nn.Dropout(0.2),
+            ))
+        # self.cnn = nn.Sequential(*self.cnn)
+        self.lstm = nn.LSTM(channels, channels//2, 1, batch_first=True, bidirectional=True)
+    def forward(self, x, input_lengths, m):
+        x = self.embedding(x)  # [B, T, emb]
+        x = x.transpose(1, 2)  # [B, emb, T]
+        m = m.to(input_lengths.device).unsqueeze(1)
+        x.masked_fill_(m, 0.0)
+        for c in self.cnn:
+            x = c(x)
+            x.masked_fill_(m, 0.0)
+        x = x.transpose(1, 2)  # [B, T, chn]
+        input_lengths = input_lengths.cpu().numpy()
+        x = nn.utils.rnn.pack_padded_sequence(
+            x, input_lengths, batch_first=True, enforce_sorted=False)
+        self.lstm.flatten_parameters()
+        x, _ = self.lstm(x)
+        x, _ = nn.utils.rnn.pad_packed_sequence(
+            x, batch_first=True)
+        x = x.transpose(-1, -2)
+        x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
+        x_pad[:, :, :x.shape[-1]] = x
+        x = x_pad.to(x.device)
+        x.masked_fill_(m, 0.0)
+        return x
+    def inference(self, x):
+        x = self.embedding(x)
+        x = x.transpose(1, 2)
+        x = self.cnn(x)
+        x = x.transpose(1, 2)
+        self.lstm.flatten_parameters()
+        x, _ = self.lstm(x)
+        return x
+    def length_to_mask(self, lengths):
+        mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+        mask = torch.gt(mask+1, lengths.unsqueeze(1))
+        return mask
+class AdaIN1d(nn.Module):
+    def __init__(self, style_dim, num_features):
+        super().__init__()
+        self.norm = nn.InstanceNorm1d(num_features, affine=False)
+        self.fc = nn.Linear(style_dim, num_features*2)
+    def forward(self, x, s):
+        h = self.fc(s)
+        h = h.view(h.size(0), h.size(1), 1)
+        gamma, beta = torch.chunk(h, chunks=2, dim=1)
+        # affine (1 + lin(x)) * inst(x) + lin(x)    is this a skip connection where the weight is a lin of itself
+        return (1 + gamma) * self.norm(x) + beta
+class UpSample1d(nn.Module):
+    def __init__(self, layer_type):
+        super().__init__()
+        self.layer_type = layer_type
+    def forward(self, x):
+        if self.layer_type == 'none':
+            return x
+        else:
+            return F.interpolate(x, scale_factor=2, mode='nearest')
+class AdainResBlk1d(nn.Module):
+    def __init__(self, dim_in, dim_out, style_dim=64, actv=nn.LeakyReLU(0.2),
+                 upsample='none', dropout_p=0.0):
+        super().__init__()
+        self.actv = actv
+        self.upsample_type = upsample
+        self.upsample = UpSample1d(upsample)
+        self.learned_sc = dim_in != dim_out
+        self._build_weights(dim_in, dim_out, style_dim)
+        self.dropout = nn.Dropout(dropout_p)
+        if upsample == 'none':
+            self.pool = nn.Identity()
+        else:
+            self.pool = weight_norm(nn.ConvTranspose1d(dim_in, dim_in, kernel_size=3, stride=2, groups=dim_in, padding=1, output_padding=1))
+    def _build_weights(self, dim_in, dim_out, style_dim):
+        self.conv1 = weight_norm(nn.Conv1d(dim_in, dim_out, 3, 1, 1))
+        self.conv2 = weight_norm(nn.Conv1d(dim_out, dim_out, 3, 1, 1))
+        self.norm1 = AdaIN1d(style_dim, dim_in)
+        self.norm2 = AdaIN1d(style_dim, dim_out)
+        if self.learned_sc:
+            self.conv1x1 = weight_norm(nn.Conv1d(dim_in, dim_out, 1, 1, 0, bias=False))
+    def _shortcut(self, x):
+        x = self.upsample(x)
+        if self.learned_sc:
+            x = self.conv1x1(x)
+        return x
+    def _residual(self, x, s):
+        x = self.norm1(x, s)
+        x = self.actv(x)
+        x = self.pool(x)
+        x = self.conv1(self.dropout(x))
+        x = self.norm2(x, s)
+        x = self.actv(x)
+        x = self.conv2(self.dropout(x))
+        return x
+    def forward(self, x, s):
+        out = self._residual(x, s)
+        out = (out + self._shortcut(x)) / math.sqrt(2)
+        return out
+class AdaLayerNorm(nn.Module):
+    def __init__(self, style_dim, channels, eps=1e-5):
+        super().__init__()
+        self.channels = channels
+        self.eps = eps
+        self.fc = nn.Linear(style_dim, channels*2)
+    def forward(self, x, s):
+        x = x.transpose(-1, -2)
+        x = x.transpose(1, -1)
+        h = self.fc(s)
+        h = h.view(h.size(0), h.size(1), 1)
+        gamma, beta = torch.chunk(h, chunks=2, dim=1)
+        gamma, beta = gamma.transpose(1, -1), beta.transpose(1, -1)
+        x = F.layer_norm(x, (self.channels,), eps=self.eps)
+        x = (1 + gamma) * x + beta
+        return x.transpose(1, -1).transpose(-1, -2)
+class ProsodyPredictor(nn.Module):
+    def __init__(self, style_dim, d_hid, nlayers, max_dur=50, dropout=0.1):
+        super().__init__()
+        self.text_encoder = DurationEncoder(sty_dim=style_dim,
+                                            d_model=d_hid,
+                                            nlayers=nlayers,
+                                            dropout=dropout)
+        self.lstm = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
+        self.duration_proj = LinearNorm(d_hid, max_dur)
+        self.shared = nn.LSTM(d_hid + style_dim, d_hid // 2, 1, batch_first=True, bidirectional=True)
+        self.F0 = nn.ModuleList()
+        self.F0.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
+        self.F0.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
+        self.F0.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
+        self.N = nn.ModuleList()
+        self.N.append(AdainResBlk1d(d_hid, d_hid, style_dim, dropout_p=dropout))
+        self.N.append(AdainResBlk1d(d_hid, d_hid // 2, style_dim, upsample=True, dropout_p=dropout))
+        self.N.append(AdainResBlk1d(d_hid // 2, d_hid // 2, style_dim, dropout_p=dropout))
+        self.F0_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
+        self.N_proj = nn.Conv1d(d_hid // 2, 1, 1, 1, 0)
+    def F0Ntrain(self, x, s):
+        x, _ = self.shared(x.transpose(-1, -2))
+        F0 = x.transpose(-1, -2)
+        for block in self.F0:
+            F0 = block(F0, s)
+        F0 = self.F0_proj(F0)
+        N = x.transpose(-1, -2)
+        for block in self.N:
+            N = block(N, s)
+        N = self.N_proj(N)
+        return F0.squeeze(1), N.squeeze(1)
+    def length_to_mask(self, lengths):
+        mask = torch.arange(lengths.max()).unsqueeze(0).expand(lengths.shape[0], -1).type_as(lengths)
+        mask = torch.gt(mask+1, lengths.unsqueeze(1))
+        return mask
+class DurationEncoder(nn.Module):
+    def __init__(self, sty_dim, d_model, nlayers, dropout=0.1):
+        super().__init__()
+        self.lstms = nn.ModuleList()
+        for _ in range(nlayers):
+            self.lstms.append(nn.LSTM(d_model + sty_dim,
+                                 d_model // 2,
+                                 num_layers=1,
+                                 batch_first=True,
+                                 bidirectional=True,
+                                 dropout=dropout))
+            self.lstms.append(AdaLayerNorm(sty_dim, d_model))
+        self.dropout = dropout
+        self.d_model = d_model
+        self.sty_dim = sty_dim
+    def forward(self, x, style, text_lengths, m):
+        masks = m.to(text_lengths.device)
+        x = x.permute(2, 0, 1)
+        s = style.expand(x.shape[0], x.shape[1], -1)
+        x = torch.cat([x, s], axis=-1)
+        x.masked_fill_(masks.unsqueeze(-1).transpose(0, 1), 0.0)
+        x = x.transpose(0, 1)
+        input_lengths = text_lengths.cpu().numpy()
+        x = x.transpose(-1, -2)
+        for block in self.lstms:
+            if isinstance(block, AdaLayerNorm):
+                x = block(x.transpose(-1, -2), style).transpose(-1, -2)
+                x = torch.cat([x, s.permute(1, -1, 0)], axis=1)
+                x.masked_fill_(masks.unsqueeze(-1).transpose(-1, -2), 0.0)
+            else:
+                x = x.transpose(-1, -2)
+                x = nn.utils.rnn.pack_padded_sequence(
+                    x, input_lengths, batch_first=True, enforce_sorted=False)
+                block.flatten_parameters()
+                x, _ = block(x)
+                x, _ = nn.utils.rnn.pad_packed_sequence(
+                    x, batch_first=True)
+                x = F.dropout(x, p=self.dropout, training=self.training)
+                x = x.transpose(-1, -2)
+                x_pad = torch.zeros([x.shape[0], x.shape[1], m.shape[-1]])
+                x_pad[:, :, :x.shape[-1]] = x
+                x = x_pad.to(x.device)
+#         print('Calling Duration Encoder\n\n\n\n',x.shape, x.min(), x.max())
+#         Calling Duration Encoder
+#  torch.Size([1, 640, 107]) tensor(-3.0903, device='cuda:0') tensor(2.3089, device='cuda:0')
+        return x.transpose(-1, -2)
+def load_F0_models(path):
+    # load F0 model
+    F0_model = JDCNet(num_class=1, seq_len=192)
+    print(path, 'WHAT ARE YOU TRYING TO LOAD F0 L520')
+    path = path.replace('.t7', '.pth')
+    params = torch.load(path, map_location='cpu')['net']
+    F0_model.load_state_dict(params)
+    _ = F0_model.train()
+    return F0_model
+def load_ASR_models(ASR_MODEL_PATH, ASR_MODEL_CONFIG):
+    # load ASR model
+    def _load_config(path):
+        with open(path) as f:
+            config = yaml.safe_load(f)
+        model_config = config['model_params']
+        return model_config
+    def _load_model(model_config, model_path):
+        model = ASRCNN(**model_config)
+        params = torch.load(model_path, map_location='cpu')['model']
+        model.load_state_dict(params)
+        return model
+    asr_model_config = _load_config(ASR_MODEL_CONFIG)
+    asr_model = _load_model(asr_model_config, ASR_MODEL_PATH)
+    _ = asr_model.train()
+    return asr_model
+def build_model(args, text_aligner, pitch_extractor, bert):
+    print(f'\n==============\n {args.decoder.type=}\n==============L584 models.py @ build_model()\n')
+    from Modules.hifigan import Decoder
+    decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
+            resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
+            upsample_rates = args.decoder.upsample_rates,
+            upsample_initial_channel=args.decoder.upsample_initial_channel,
+            resblock_dilation_sizes=args.decoder.resblock_dilation_sizes,
+            upsample_kernel_sizes=args.decoder.upsample_kernel_sizes)
+    text_encoder = TextEncoder(channels=args.hidden_dim, kernel_size=5, depth=args.n_layer, n_symbols=args.n_token)
+    predictor = ProsodyPredictor(style_dim=args.style_dim, d_hid=args.hidden_dim, nlayers=args.n_layer, max_dur=args.max_dur, dropout=args.dropout)
+    style_encoder = StyleEncoder(dim_in=args.dim_in, style_dim=args.style_dim, max_conv_dim=args.hidden_dim) # acoustic style encoder
+    predictor_encoder = StyleEncoder(dim_in=args.dim_in, style_dim=args.style_dim, max_conv_dim=args.hidden_dim) # prosodic style encoder
+    # define diffusion model
+    if args.multispeaker:
+        transformer = StyleTransformer1d(channels=args.style_dim*2,
+                                    context_embedding_features=bert.config.hidden_size,
+                                    context_features=args.style_dim*2,
+                                    **args.diffusion.transformer)
+    else:
+        transformer = Transformer1d(channels=args.style_dim*2,
+                                    context_embedding_features=bert.config.hidden_size,
+                                    **args.diffusion.transformer)
+    diffusion = AudioDiffusionConditional(
+        in_channels=1,
+        embedding_max_length=bert.config.max_position_embeddings,
+        embedding_features=bert.config.hidden_size,
+        embedding_mask_proba=args.diffusion.embedding_mask_proba, # Conditional dropout of batch elements,
+        channels=args.style_dim*2,
+        context_features=args.style_dim*2,
+    )
+    diffusion.diffusion = KDiffusion(
+        net=diffusion.unet,
+        sigma_distribution=LogNormalDistribution(mean = args.diffusion.dist.mean, std = args.diffusion.dist.std),
+        sigma_data=args.diffusion.dist.sigma_data, # a placeholder, will be changed dynamically when start training diffusion model
+        dynamic_threshold=0.0
+    )
+    diffusion.diffusion.net = transformer
+    diffusion.unet = transformer
+    nets = Munch(
+            bert=bert,
+            bert_encoder=nn.Linear(bert.config.hidden_size, args.hidden_dim),
+            predictor=predictor,
+            decoder=decoder,
+            text_encoder=text_encoder,
+            predictor_encoder=predictor_encoder,
+            style_encoder=style_encoder,
+            diffusion=diffusion,
+            text_aligner = text_aligner,
+            pitch_extractor=pitch_extractor
+       )
+    return nets

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ espeak-ng

pca/annotated_features.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4d9a2bd9d3139441989c822d48fc96f887e0d3ea015d55dcb5df9b004836e4
+size 2576

pca/annotations.json ADDED Viewed

	@@ -0,0 +1,1991 @@

+[
+  {
+    "audio": "\/data\/upload\/1\/a0e6621c-09uKIJP6.wav",
+    "id": 1,
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "gender": [
+      {
+        "rating": 1
+      }
+    ],
+    "tone": [
+      {
+        "rating": 1
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 1
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 1,
+    "created_at": "2024-12-24T11:07:30.438634Z",
+    "updated_at": "2024-12-24T11:07:30.438652Z",
+    "lead_time": 78.024
+  },
+  {
+    "audio": "\/data\/upload\/1\/e8e3e271-gKtQS1UY.wav",
+    "id": 2,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 2,
+    "created_at": "2024-12-24T11:07:48.953656Z",
+    "updated_at": "2024-12-24T11:07:48.953687Z",
+    "lead_time": 18.297
+  },
+  {
+    "audio": "\/data\/upload\/1\/77d7e6d8-PTYJ87FC.wav",
+    "id": 3,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 1
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 3,
+    "created_at": "2024-12-24T11:08:08.418844Z",
+    "updated_at": "2024-12-24T11:08:08.418858Z",
+    "lead_time": 19.242
+  },
+  {
+    "audio": "\/data\/upload\/1\/31246ad1-IPKUCJ5q.wav",
+    "id": 4,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 5
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 4,
+    "created_at": "2024-12-24T11:09:05.335687Z",
+    "updated_at": "2024-12-24T11:09:05.335705Z",
+    "lead_time": 18.536
+  },
+  {
+    "audio": "\/data\/upload\/1\/98bc1f15-aMNM7RnL.wav",
+    "id": 5,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 5,
+    "created_at": "2024-12-24T11:09:22.334461Z",
+    "updated_at": "2024-12-24T11:09:22.334478Z",
+    "lead_time": 16.818
+  },
+  {
+    "audio": "\/data\/upload\/1\/23109e4f-icIKTMCB.wav",
+    "id": 6,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 6,
+    "created_at": "2024-12-24T11:09:38.553151Z",
+    "updated_at": "2024-12-24T11:09:38.553169Z",
+    "lead_time": 16.039
+  },
+  {
+    "audio": "\/data\/upload\/1\/224025c1-E5F0dino.wav",
+    "id": 7,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 7,
+    "created_at": "2024-12-24T11:09:56.657942Z",
+    "updated_at": "2024-12-24T11:09:56.657961Z",
+    "lead_time": 17.889
+  },
+  {
+    "audio": "\/data\/upload\/1\/cb85576d-1EtXTH9V.wav",
+    "id": 8,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 5
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 8,
+    "created_at": "2024-12-24T11:10:14.123722Z",
+    "updated_at": "2024-12-24T11:10:14.123739Z",
+    "lead_time": 17.245
+  },
+  {
+    "audio": "\/data\/upload\/1\/60474851-a1k94fVP.wav",
+    "id": 9,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "quality": [
+      {
+        "rating": 1
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 9,
+    "created_at": "2024-12-24T11:10:29.310038Z",
+    "updated_at": "2024-12-24T11:10:29.310055Z",
+    "lead_time": 14.969
+  },
+  {
+    "audio": "\/data\/upload\/1\/38b8e84f-NMbCNxj4.wav",
+    "id": 10,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 10,
+    "created_at": "2024-12-24T11:10:48.858033Z",
+    "updated_at": "2024-12-24T11:10:48.858047Z",
+    "lead_time": 19.352
+  },
+  {
+    "audio": "\/data\/upload\/1\/5701e1ab-smOpJVPt.wav",
+    "id": 11,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 11,
+    "created_at": "2024-12-24T11:11:09.250985Z",
+    "updated_at": "2024-12-24T11:11:09.251003Z",
+    "lead_time": 20.202
+  },
+  {
+    "audio": "\/data\/upload\/1\/d2348238-b0hbiZd4.wav",
+    "id": 12,
+    "gender": [
+      {
+        "rating": 5
+      }
+    ],
+    "tone": [
+      {
+        "rating": 5
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 12,
+    "created_at": "2024-12-24T11:11:24.519944Z",
+    "updated_at": "2024-12-24T11:11:24.519960Z",
+    "lead_time": 15.064
+  },
+  {
+    "audio": "\/data\/upload\/1\/e24973a9-w6xb7KtX.wav",
+    "id": 13,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 13,
+    "created_at": "2024-12-24T11:11:43.845338Z",
+    "updated_at": "2024-12-24T11:11:43.845355Z",
+    "lead_time": 19.115
+  },
+  {
+    "audio": "\/data\/upload\/1\/1c7b216d-9uBvtpvw.wav",
+    "id": 14,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 14,
+    "created_at": "2024-12-24T11:12:05.707588Z",
+    "updated_at": "2024-12-24T11:12:05.707607Z",
+    "lead_time": 21.646
+  },
+  {
+    "audio": "\/data\/upload\/1\/f4a0294e-U3XBhPBR.wav",
+    "id": 15,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 15,
+    "created_at": "2024-12-24T11:12:24.220074Z",
+    "updated_at": "2024-12-24T11:12:24.220103Z",
+    "lead_time": 18.288
+  },
+  {
+    "audio": "\/data\/upload\/1\/bf754fd0-c5qSnKvS.wav",
+    "id": 16,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 16,
+    "created_at": "2024-12-24T11:12:44.912272Z",
+    "updated_at": "2024-12-24T11:12:44.912290Z",
+    "lead_time": 20.478
+  },
+  {
+    "audio": "\/data\/upload\/1\/4f38ebb9-gQGR5r2f.wav",
+    "id": 17,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 17,
+    "created_at": "2024-12-24T11:12:59.915745Z",
+    "updated_at": "2024-12-24T11:12:59.915768Z",
+    "lead_time": 14.792
+  },
+  {
+    "audio": "\/data\/upload\/1\/d11c1e40-bjFLpT38.wav",
+    "id": 18,
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 5
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 18,
+    "created_at": "2024-12-24T11:13:18.604486Z",
+    "updated_at": "2024-12-24T11:13:18.604513Z",
+    "lead_time": 18.46
+  },
+  {
+    "audio": "\/data\/upload\/1\/8bb81706-FAjL28lO.wav",
+    "id": 19,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 19,
+    "created_at": "2024-12-24T11:13:38.605869Z",
+    "updated_at": "2024-12-24T11:13:38.605883Z",
+    "lead_time": 19.81
+  },
+  {
+    "audio": "\/data\/upload\/1\/aecc6e1e-oWkO6Hjw.wav",
+    "id": 20,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 20,
+    "created_at": "2024-12-24T11:13:56.837635Z",
+    "updated_at": "2024-12-24T11:13:56.837651Z",
+    "lead_time": 18.017
+  },
+  {
+    "audio": "\/data\/upload\/1\/ebe57847-6784JzTG.wav",
+    "id": 21,
+    "gender": [
+      {
+        "rating": 5
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 21,
+    "created_at": "2024-12-24T11:14:10.304635Z",
+    "updated_at": "2024-12-24T11:14:10.304650Z",
+    "lead_time": 13.261
+  },
+  {
+    "audio": "\/data\/upload\/1\/3e2fb517-eKehysg6.wav",
+    "id": 22,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 22,
+    "created_at": "2024-12-24T11:14:30.874016Z",
+    "updated_at": "2024-12-24T11:14:30.874050Z",
+    "lead_time": 20.356
+  },
+  {
+    "audio": "\/data\/upload\/1\/e54d1cd4-VyDYticQ.wav",
+    "id": 23,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 5
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 23,
+    "created_at": "2024-12-24T11:14:50.274649Z",
+    "updated_at": "2024-12-24T11:14:50.274679Z",
+    "lead_time": 19.207
+  },
+  {
+    "audio": "\/data\/upload\/1\/06ae990f-u3g9CMFd.wav",
+    "id": 24,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 24,
+    "created_at": "2024-12-24T11:15:09.533558Z",
+    "updated_at": "2024-12-24T11:15:09.533575Z",
+    "lead_time": 19.046
+  },
+  {
+    "audio": "\/data\/upload\/1\/2b0a0481-60HQeXwq.wav",
+    "id": 25,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 25,
+    "created_at": "2024-12-24T11:15:36.688760Z",
+    "updated_at": "2024-12-24T11:15:36.688777Z",
+    "lead_time": 26.939
+  },
+  {
+    "audio": "\/data\/upload\/1\/6b789c7b-lXr0TsEs.wav",
+    "id": 26,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 1
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 26,
+    "created_at": "2024-12-24T11:16:02.130089Z",
+    "updated_at": "2024-12-24T11:16:02.130104Z",
+    "lead_time": 25.222
+  },
+  {
+    "audio": "\/data\/upload\/1\/54701cf9-Xmsf8FAI.wav",
+    "id": 27,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 27,
+    "created_at": "2024-12-24T11:16:24.322800Z",
+    "updated_at": "2024-12-24T11:16:24.322816Z",
+    "lead_time": 21.974
+  },
+  {
+    "audio": "\/data\/upload\/1\/d4c11f09-k74bHcAg.wav",
+    "id": 28,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 28,
+    "created_at": "2024-12-24T11:16:45.824302Z",
+    "updated_at": "2024-12-24T11:16:45.824318Z",
+    "lead_time": 21.292
+  },
+  {
+    "audio": "\/data\/upload\/1\/47f26601-mvoZlxHp.wav",
+    "id": 29,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 29,
+    "created_at": "2024-12-24T11:17:03.084102Z",
+    "updated_at": "2024-12-24T11:17:03.084117Z",
+    "lead_time": 17.05
+  },
+  {
+    "audio": "\/data\/upload\/1\/bcfd3f48-PRqxs9Rw.wav",
+    "id": 30,
+    "gender": [
+      {
+        "rating": 1
+      }
+    ],
+    "tone": [
+      {
+        "rating": 1
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 30,
+    "created_at": "2024-12-24T11:17:37.512393Z",
+    "updated_at": "2024-12-24T11:17:37.512413Z",
+    "lead_time": 18.603
+  },
+  {
+    "audio": "\/data\/upload\/1\/8a85e9db-BMW1WUKl.wav",
+    "id": 31,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 31,
+    "created_at": "2024-12-24T11:17:57.564014Z",
+    "updated_at": "2024-12-24T11:17:57.564028Z",
+    "lead_time": 19.839
+  },
+  {
+    "audio": "\/data\/upload\/1\/2fe4e716-GWHpVwcn.wav",
+    "id": 32,
+    "gender": [
+      {
+        "rating": 1
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 32,
+    "created_at": "2024-12-24T11:18:16.433573Z",
+    "updated_at": "2024-12-24T11:18:16.433589Z",
+    "lead_time": 18.673
+  },
+  {
+    "audio": "\/data\/upload\/1\/a398fdc8-THjkbhMN.wav",
+    "id": 33,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 33,
+    "created_at": "2024-12-24T11:18:33.011557Z",
+    "updated_at": "2024-12-24T11:18:33.011573Z",
+    "lead_time": 16.387
+  },
+  {
+    "audio": "\/data\/upload\/1\/9479d680-MY4NGnyn.wav",
+    "id": 34,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 34,
+    "created_at": "2024-12-24T11:18:53.212410Z",
+    "updated_at": "2024-12-24T11:18:53.212424Z",
+    "lead_time": 20.005
+  },
+  {
+    "audio": "\/data\/upload\/1\/099ef5d9-1EXWzyIB.wav",
+    "id": 35,
+    "quality": [
+      {
+        "rating": 1
+      }
+    ],
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 35,
+    "created_at": "2024-12-24T11:19:15.157540Z",
+    "updated_at": "2024-12-24T11:19:15.157555Z",
+    "lead_time": 21.708
+  },
+  {
+    "audio": "\/data\/upload\/1\/7757c410-iJzdxYU8.wav",
+    "id": 36,
+    "gender": [
+      {
+        "rating": 1
+      }
+    ],
+    "tone": [
+      {
+        "rating": 1
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 2
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 36,
+    "created_at": "2024-12-24T11:19:33.880308Z",
+    "updated_at": "2024-12-24T11:19:33.880323Z",
+    "lead_time": 18.515
+  },
+  {
+    "audio": "\/data\/upload\/1\/160cf4d1-ClmchUus.wav",
+    "id": 37,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 37,
+    "created_at": "2024-12-24T11:19:49.775185Z",
+    "updated_at": "2024-12-24T11:19:49.775199Z",
+    "lead_time": 15.7
+  },
+  {
+    "audio": "\/data\/upload\/1\/937219e2-YKQzFNZm.wav",
+    "id": 38,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 38,
+    "created_at": "2024-12-24T11:20:28.907802Z",
+    "updated_at": "2024-12-24T11:20:28.907820Z",
+    "lead_time": 17.855
+  },
+  {
+    "audio": "\/data\/upload\/1\/85b05ec9-yYkxPNG5.wav",
+    "id": 39,
+    "quality": [
+      {
+        "rating": 1
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 39,
+    "created_at": "2024-12-24T11:20:49.809333Z",
+    "updated_at": "2024-12-24T11:20:49.809349Z",
+    "lead_time": 20.679
+  },
+  {
+    "audio": "\/data\/upload\/1\/3b2dfcc0-cfyLTOg1.wav",
+    "id": 40,
+    "gender": [
+      {
+        "rating": 1
+      }
+    ],
+    "tone": [
+      {
+        "rating": 1
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 40,
+    "created_at": "2024-12-24T11:21:06.264451Z",
+    "updated_at": "2024-12-24T11:21:06.264483Z",
+    "lead_time": 16.248
+  },
+  {
+    "audio": "\/data\/upload\/1\/2411d637-ACYkJHOa.wav",
+    "id": 41,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 41,
+    "created_at": "2024-12-24T11:21:24.839843Z",
+    "updated_at": "2024-12-24T11:21:24.839861Z",
+    "lead_time": 18.37
+  },
+  {
+    "audio": "\/data\/upload\/1\/079a1904-IvSZLYao.wav",
+    "id": 42,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 5
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 4
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 42,
+    "created_at": "2024-12-24T11:21:41.007058Z",
+    "updated_at": "2024-12-24T11:21:41.007073Z",
+    "lead_time": 15.937
+  },
+  {
+    "audio": "\/data\/upload\/1\/1a523970-Kaze7Zvg.wav",
+    "id": 43,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 43,
+    "created_at": "2024-12-24T11:22:04.349613Z",
+    "updated_at": "2024-12-24T11:22:04.349628Z",
+    "lead_time": 23.163
+  },
+  {
+    "audio": "\/data\/upload\/1\/d157ccb8-xHX2MO3X.wav",
+    "id": 44,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 5
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 5
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 44,
+    "created_at": "2024-12-24T11:22:22.055807Z",
+    "updated_at": "2024-12-24T11:22:22.055826Z",
+    "lead_time": 17.508
+  },
+  {
+    "audio": "\/data\/upload\/1\/c37c9115-dlEOGJ9f.wav",
+    "id": 45,
+    "gender": [
+      {
+        "rating": 5
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 45,
+    "created_at": "2024-12-24T11:22:39.871511Z",
+    "updated_at": "2024-12-24T11:22:39.871527Z",
+    "lead_time": 17.6
+  },
+  {
+    "audio": "\/data\/upload\/1\/68bceda3-LL2BqHEb.wav",
+    "id": 46,
+    "tone": [
+      {
+        "rating": 4
+      }
+    ],
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 3
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 46,
+    "created_at": "2024-12-24T11:23:08.481278Z",
+    "updated_at": "2024-12-24T11:23:08.481294Z",
+    "lead_time": 28.379
+  },
+  {
+    "audio": "\/data\/upload\/1\/b1a204c2-nU9SXVYj.wav",
+    "id": 47,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 47,
+    "created_at": "2024-12-24T11:23:29.486018Z",
+    "updated_at": "2024-12-24T11:23:29.486034Z",
+    "lead_time": 20.793
+  },
+  {
+    "audio": "\/data\/upload\/1\/0cd94ebd-7ZZ5zeJH.wav",
+    "id": 48,
+    "gender": [
+      {
+        "rating": 2
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 5
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 48,
+    "created_at": "2024-12-24T11:23:46.816956Z",
+    "updated_at": "2024-12-24T11:23:46.816974Z",
+    "lead_time": 17.117
+  },
+  {
+    "audio": "\/data\/upload\/1\/0e73f1d1-vxsp9Z1b.wav",
+    "id": 49,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 4
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 49,
+    "created_at": "2024-12-24T11:24:04.381173Z",
+    "updated_at": "2024-12-24T11:24:04.381190Z",
+    "lead_time": 17.363
+  },
+  {
+    "audio": "\/data\/upload\/1\/8dda7cf2-7IskTrY6.wav",
+    "id": 50,
+    "gender": [
+      {
+        "rating": 3
+      }
+    ],
+    "tone": [
+      {
+        "rating": 2
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 2
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 4
+      }
+    ],
+    "quality": [
+      {
+        "rating": 2
+      }
+    ],
+    "style": [
+      {
+        "rating": 5
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 50,
+    "created_at": "2024-12-24T11:24:23.266193Z",
+    "updated_at": "2024-12-24T11:24:23.266207Z",
+    "lead_time": 18.68
+  },
+  {
+    "audio": "\/data\/upload\/1\/d7107f33-TDGKyS0u.wav",
+    "id": 51,
+    "gender": [
+      {
+        "rating": 4
+      }
+    ],
+    "tone": [
+      {
+        "rating": 3
+      }
+    ],
+    "pacing": [
+      {
+        "rating": 3
+      }
+    ],
+    "enunciation": [
+      {
+        "rating": 3
+      }
+    ],
+    "quality": [
+      {
+        "rating": 3
+      }
+    ],
+    "style": [
+      {
+        "rating": 4
+      }
+    ],
+    "annotator": 1,
+    "annotation_id": 51,
+    "created_at": "2024-12-24T11:24:41.890559Z",
+    "updated_at": "2024-12-24T11:24:41.890574Z",
+    "lead_time": 18.436
+  }
+]

pca/generate_pca.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import numpy as np
+import json
+from sklearn.decomposition import PCA
+import joblib
+# File paths
+VOICES_JSON_PATH = "voices.json"
+ANNOTATIONS_JSON_PATH = "annotations.json"
+PCA_MODEL_PATH = "pca_model.pkl"
+VECTOR_DIMENSION = 256  # Adjust based on your actual vector size
+N_COMPONENTS = 6  # Number of PCA components for annotated features
+def load_json(file_path):
+    """Load a JSON file."""
+    try:
+        with open(file_path, "r") as f:
+            return json.load(f)
+    except FileNotFoundError:
+        print(f"Error: {file_path} not found.")
+        return {}
+    except json.JSONDecodeError:
+        print(f"Error: {file_path} is not valid JSON.")
+        return {}
+def extract_annotated_vectors():
+    """
+    Load annotations and match annotated features with style vectors.
+    Returns:
+        np.ndarray: Style vectors (256-dim).
+        np.ndarray: Annotated features (n_components-dim).
+    """
+    # Load data
+    voices_data = load_json(VOICES_JSON_PATH)
+    annotations = load_json(ANNOTATIONS_JSON_PATH)
+    style_vectors = []
+    annotated_features = []
+    # Extract annotated features and match style vectors
+    for item in annotations:
+        # Extract the key for the style vector
+        audio_path = item.get("audio", "")
+        key = audio_path.split("/")[-1].split("-")[-1].replace(".wav", "")
+        # Skip if the style vector is missing
+        if key not in voices_data:
+            print(f"Warning: No style vector found for key '{key}'. Skipping.")
+            continue
+        # Get the style vector and ensure it's flattened to 1D
+        style_vector = np.array(voices_data[key], dtype=np.float32).squeeze()
+        if style_vector.ndim != 1:
+            print(f"Skipping vector with unexpected dimensions: {style_vector.shape}")
+            continue
+        # Extract annotated features (pacing, gender, tone, enunciation, style)
+        features = [
+            item["gender"][0]["rating"],
+            item["tone"][0]["rating"],
+            item["pacing"][0]["rating"],
+            item["enunciation"][0]["rating"],
+            item["quality"][0]["rating"],
+            item["style"][0]["rating"],
+        ]
+        # Append data
+        style_vectors.append(style_vector)
+        annotated_features.append(features)
+    if not style_vectors or not annotated_features:
+        print("Error: No valid style vectors or annotations found.")
+        return None, None
+    return np.array(style_vectors), np.array(annotated_features)
+def train_and_save_pca_model():
+    """
+    Train the PCA model using annotated style vectors and save the model.
+    """
+    # Extract style vectors and annotated features
+    style_vectors, annotated_features = extract_annotated_vectors()
+    if style_vectors is None or annotated_features is None:
+        print("Error: Unable to extract annotated data.")
+        return
+    # Validate shape of style_vectors
+    print(f"Style vectors shape: {style_vectors.shape}")  # Should be (n_samples, 256)
+    print(
+        f"Annotated features shape: {annotated_features.shape}"
+    )  # Should be (n_samples, 5)
+    # Train PCA on style vectors
+    print(f"Training PCA on {len(style_vectors)} style vectors...")
+    pca = PCA(n_components=N_COMPONENTS)
+    pca.fit(style_vectors)
+    # Save PCA model
+    joblib.dump(pca, PCA_MODEL_PATH)
+    print(f"PCA model saved to {PCA_MODEL_PATH}.")
+    # Optionally save annotated features for downstream tasks
+    np.save("annotated_features.npy", annotated_features)
+    print("Annotated features saved to 'annotated_features.npy'.")
+def load_pca_model():
+    """Load the trained PCA model."""
+    try:
+        return joblib.load(PCA_MODEL_PATH)
+    except FileNotFoundError:
+        print(f"Error: {PCA_MODEL_PATH} not found.")
+        return None
+def reduce_to_pca_components(style_vector, pca):
+    """
+    Reduce a 256-dimensional style vector to PCA space.
+    Args:
+        style_vector (np.ndarray): Original style vector (256-dim).
+        pca (PCA): Trained PCA model.
+    Returns:
+        np.ndarray: Reduced vector in PCA space (n_components-dim).
+    """
+    return pca.transform([style_vector])[0]
+def reconstruct_from_pca_components(pca_vector, pca):
+    """
+    Reconstruct the original style vector from PCA space.
+    Args:
+        pca_vector (np.ndarray): Vector in PCA space (n_components-dim).
+        pca (PCA): Trained PCA model.
+    Returns:
+        np.ndarray: Reconstructed style vector (256-dim).
+    """
+    return pca.inverse_transform([pca_vector])[0]
+if __name__ == "__main__":
+    train_and_save_pca_model()

pca/pca_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e81889d232f9a2c09355466b8069e29f3368f4ccc2e7d640be6223c1eea1d8c
+size 8175

pca/voices.json ADDED Viewed

The diff for this file is too large to render. See raw diff

pca_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e81889d232f9a2c09355466b8069e29f3368f4ccc2e7d640be6223c1eea1d8c
+size 8175

pyproject.toml ADDED Viewed

	@@ -0,0 +1,32 @@

+[project]
+name = "artificial-styletts2"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "audiofile>=1.5.0",
+    "cached-path>=1.6.6",
+    "einops>=0.8.0",
+    "einops-exts>=0.0.4",
+    "gradio>=5.9.1",
+    "huggingface-hub>=0.26.5",
+    "librosa>=0.10.2.post1",
+    "markdown>=3.7",
+    "matplotlib>=3.10.0",
+    "monotonic-align",
+    "munch>=4.0.0",
+    "nltk>=3.9.1",
+    "numpy==2.0",
+    "phonemizer>=3.3.0",
+    "scikit-learn>=1.6.0",
+    "soundfile>=0.12.1",
+    "torch>=2.5.1",
+    "torchaudio>=2.5.1",
+    "tqdm>=4.67.1",
+    "transformers>=4.47.1",
+    "txtsplit>=1.0.0",
+]
+[tool.uv.sources]
+monotonic-align = { git = "https://github.com/resemble-ai/monotonic_align.git" }

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+git+https://github.com/resemble-ai/monotonic_align.git
+gradio
+audiofile==1.5.0
+cached_path
+einops==0.8.0
+einops_exts==0.0.4
+huggingface_hub
+librosa
+Markdown==3.7
+matplotlib==3.10.0
+munch==4.0.0
+nltk==3.9.1
+numpy
+phonemizer==3.3.0
+scikit-learn
+soundfile==0.12.1
+torch
+torchaudio==2.5.1
+tqdm==4.67.1
+transformers==4.47.1
+txtsplit==1.0.0

text2speech.py ADDED Viewed

	@@ -0,0 +1,598 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import json
+import os
+import argparse
+import random
+import string
+import numpy as np
+import soundfile as sf  # Alias for clarity
+import torch
+import inference
+from txtsplit import txtsplit  # Import txtsplit
+from typing import Optional, Tuple, List
+VOICES_JSON_PATH = "voices.json"  # Contains your known style vectors
+RANDOM_VOICES_JSON_PATH = "random_voices.json"  # We'll store newly sampled vectors here
+##############################################################################
+# JSON LOAD/SAVE
+##############################################################################
+def load_json(path: str) -> dict:
+    """
+    Load existing style vectors from the given JSON file.
+    Additionally, validates that all style vectors have the same length.
+    Args:
+        path (str): Path to the JSON file.
+    Returns:
+        dict: Loaded JSON data.
+    """
+    data = {}
+    if os.path.exists(path):
+        with open(path, "r") as f:
+            data = json.load(f)
+        # Verify all vectors have the same length
+        lengths = set(len(vec) for vec in data.values())
+        if len(lengths) > 1:
+            raise ValueError(
+                f"Inconsistent vector lengths found in '{path}': {lengths}. "
+                "All style vectors must have the same dimensionality."
+            )
+        print(f"Loaded {len(data)} style vectors from '{path}'.")
+    else:
+        print(f"No existing '{path}' found. Starting with an empty dictionary.")
+    return data
+def save_json(data: dict, path: str) -> None:
+    """
+    Save a dict of style vectors to the given JSON file.
+    Args:
+        data (dict): Data to save.
+        path (str): Path to the JSON file.
+    """
+    with open(path, "w") as f:
+        json.dump(data, f, indent=2)
+    print(f"Saved {len(data)} style vectors to '{path}'.")
+##############################################################################
+# GAUSSIAN FIT AND SAMPLING
+##############################################################################
+def fit_gaussian_to_voices(voices_data: dict) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Fit a Gaussian distribution (mean & cov) to the style vectors in 'voices_data'.
+    'voices_data' is a dict: { "key.wav": <list-of-floats>, ... }
+    Args:
+        voices_data (dict): Dictionary containing style vectors.
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: Mean and covariance of the fitted Gaussian.
+    """
+    all_vecs = []
+    for key, data in voices_data.items():
+        # Convert to array
+        arr = np.array(data, dtype=np.float32)
+        # Squeeze out any dimension of size 1
+        arr = np.squeeze(arr)
+        if arr.ndim == 1:
+            # It's shape (D,)
+            all_vecs.append(arr)
+        else:
+            # If still not 1D, we skip or warn
+            print(
+                f"Skipping '{key}' because shape is {arr.shape}, not 1D after squeeze."
+            )
+    # Must have at least 2 valid vectors to compute a meaningful covariance
+    if len(all_vecs) < 2:
+        raise ValueError(
+            "Need at least 2 valid style vectors to fit a Gaussian distribution.\n"
+            "Check that each entry is 1D (or (1,D) which can be squeezed)."
+        )
+    # Stack into (N, D)
+    mat = np.stack(all_vecs, axis=0)  # shape => (N, D)
+    # Sanity check
+    if mat.ndim != 2:
+        raise ValueError("Style vectors must collectively form a 2D array (N, D).")
+    # Compute mean & covariance
+    mean = np.mean(mat, axis=0)  # shape (D,)
+    cov = np.cov(mat, rowvar=False)  # shape (D, D)
+    print("Fitted Gaussian distribution to style vectors.")
+    return mean, cov
+def sample_random_style(mean: np.ndarray, cov: np.ndarray) -> torch.Tensor:
+    """
+    Sample a random style vector from a Gaussian distribution.
+    Args:
+        mean (np.ndarray): Mean vector of the Gaussian.
+        cov (np.ndarray): Covariance matrix of the Gaussian.
+    Returns:
+        torch.Tensor: Sampled style vector as a tensor of shape (1, D).
+    """
+    # Sample from multivariate normal distribution
+    z = np.random.multivariate_normal(mean, cov)
+    # Convert to torch tensor
+    style_tensor = torch.tensor(z, dtype=torch.float32)
+    # Unsqueeze to shape (1, D)
+    style_tensor = style_tensor.unsqueeze(0)
+    print(f"Sampled a new random style vector with shape {style_tensor.shape}.")
+    return style_tensor
+##############################################################################
+# UTILITIES
+##############################################################################
+def parse_speed(value) -> float:
+    """
+    Convert 'value' into a float between 0.5 and 2.0 based on custom logic.
+    Examples:
+        parse_speed("120%") -> 1.2
+        parse_speed(0.3)    -> 0.5 (clamped)
+        parse_speed(5)      -> 2.0 (clamped)
+        parse_speed("100%") -> 1.0
+        parse_speed(1)      -> 1.0
+        parse_speed(3)      -> 2.0 (clamped)
+        parse_speed(50)     -> 0.5
+        parse_speed(100)    -> 1.0
+        parse_speed(130)    -> 1.3
+        parse_speed("150")  -> 1.5
+    """
+    # 1) If string ends with '%', parse percentage
+    if isinstance(value, str):
+        value = value.strip()
+        if value.endswith("%"):
+            numeric_str = value[:-1].strip()  # remove '%' suffix
+            try:
+                f = float(numeric_str)
+            except ValueError:
+                print(
+                    f"Invalid speed format '{value}'. Falling back to default speed 1.0."
+                )
+                f = 100.0  # fallback to "100%" -> 1.0
+            speed = f / 100.0
+        else:
+            # It's a normal string; parse as float
+            try:
+                f = float(value)
+            except ValueError:
+                print(
+                    f"Invalid speed format '{value}'. Falling back to default speed 1.0."
+                )
+                f = 100.0  # fallback to "100" -> 1.0
+            # If f >= 10, treat as f/100. Example: 50 -> 0.5, 150 -> 1.5
+            speed = f / 100.0 if f >= 10 else f
+    else:
+        # 2) If not string, parse as float
+        try:
+            f = float(value)
+        except ValueError:
+            print(f"Invalid speed value '{value}'. Falling back to default speed 1.0.")
+            f = 1.0  # fallback to 1.0
+        # If f >= 10, treat as f/100
+        speed = f / 100.0 if f >= 10 else f
+    # 3) Clamp to [0.5, 2.0]
+    clamped_speed = max(0.5, min(2.0, speed))
+    if clamped_speed != speed:
+        print(f"Speed {speed} clamped to {clamped_speed}.")
+    else:
+        print(f"Parsed speed: {clamped_speed}")
+    return clamped_speed
+def concatenate_audios(audios: List[np.ndarray]) -> np.ndarray:
+    """
+    Concatenate a list of NumPy audio arrays into a single array.
+    Args:
+        audios (List[np.ndarray]): List of audio waveforms to concatenate.
+    Returns:
+        np.ndarray: Concatenated audio waveform.
+    """
+    return np.concatenate(audios, axis=0)
+##############################################################################
+# SYNTHESIS CORE FUNCTION
+##############################################################################
+def synthesize_audio(
+    text_chunks: List[str],
+    style_vec: torch.Tensor,
+    speed: float,
+    alpha: float = 0.3,
+    beta: float = 0.7,
+    diffusion_steps: int = 7,
+    embedding_scale: float = 1.0,
+) -> Optional[np.ndarray]:
+    """
+    Core function to synthesize audio from text chunks and a style vector.
+    Args:
+        text_chunks (List[str]): List of text segments to synthesize.
+        style_vec (torch.Tensor): Style vector tensor of shape (1, D).
+        speed (float): Parsed speed factor.
+        alpha (float): Alpha parameter for inference.
+        beta (float): Beta parameter for inference.
+        diffusion_steps (int): Number of diffusion steps for inference.
+        embedding_scale (float): Embedding scale parameter.
+    Returns:
+        Optional[np.ndarray]: Concatenated audio waveform, or None if synthesis fails.
+    """
+    audios = []
+    for idx, chunk in enumerate(text_chunks, 1):
+        print(f"Synthesizing chunk {idx}/{len(text_chunks)}...")
+        audio_segment = inference.inference(
+            chunk,
+            style_vec,
+            alpha=alpha,
+            beta=beta,
+            diffusion_steps=diffusion_steps,
+            embedding_scale=embedding_scale,
+            speed=speed,
+        )
+        if audio_segment is not None:
+            audios.append(audio_segment)
+            print(f"Chunk {idx} synthesized successfully.")
+        else:
+            print(f"Inference returned None for text segment {idx}: {chunk[:30]}...")
+    if not audios:
+        print("No audio segments were generated.")
+        return None
+    # Concatenate all audio segments
+    print("Concatenating audio segments...")
+    full_audio = concatenate_audios(audios)
+    print(f"Concatenated audio length: {len(full_audio)} samples.")
+    return full_audio
+##############################################################################
+# TTS USING A RANDOMLY SAMPLED STYLE
+##############################################################################
+def tts_randomized(
+    text: str, speed: float = 1.2
+) -> Tuple[Optional[np.ndarray], Optional[torch.Tensor]]:
+    """
+    1) Loads style vectors from voices.json
+    2) Fits a Gaussian to those vectors
+    3) Samples a new style vector from that distribution
+    4) Saves it in random_voices.json
+    5) Synthesizes TTS using that random style, handling long texts.
+    Args:
+        text (str): The text to be synthesized.
+        speed (float): Speed of the generated audio.
+    Returns:
+        Tuple[Optional[np.ndarray], Optional[torch.Tensor]]: (audio_waveform, style_vector)
+    """
+    # Load known style vectors from voices.json
+    voices_data = load_json(VOICES_JSON_PATH)
+    if not voices_data:
+        print(f"No data found in '{VOICES_JSON_PATH}'; cannot sample a random style.")
+        return None, None
+    # Fit Gaussian
+    try:
+        mean, cov = fit_gaussian_to_voices(voices_data)
+    except ValueError as e:
+        print(f"Error fitting Gaussian: {e}")
+        return None, None
+    # Sample new vector
+    random_style_tensor = sample_random_style(mean, cov)
+    # Optionally create a random key for storing
+    random_key = "random_" + "".join(random.choices(string.digits, k=6))
+    print(f"Generated random style key: '{random_key}'")
+    # Save in random_voices.json
+    random_voices_data = load_json(RANDOM_VOICES_JSON_PATH)
+    random_voices_data[random_key] = random_style_tensor.squeeze(0).tolist()
+    save_json(random_voices_data, RANDOM_VOICES_JSON_PATH)
+    print(
+        f"Saved random style vector to '{RANDOM_VOICES_JSON_PATH}' under key '{random_key}'."
+    )
+    # Parse speed
+    speed = parse_speed(speed)
+    # Split text into manageable chunks using txtsplit
+    print("Splitting text into chunks...")
+    text_chunks = txtsplit(text)
+    print(f"Text split into {len(text_chunks)} chunks.")
+    # Synthesize audio using the core function
+    full_audio = synthesize_audio(
+        text_chunks=text_chunks, style_vec=random_style_tensor, speed=speed
+    )
+    return full_audio, random_style_tensor
+##############################################################################
+# NORMAL (NON-RANDOM) TTS LOGIC
+##############################################################################
+def get_or_compute_style_vector(key_or_path: str, voices_data: dict) -> torch.Tensor:
+    """
+    If key_or_path is in voices_data, load it.
+    If it's a file path, compute style from audio.
+    Otherwise, raise an error.
+    Args:
+        key_or_path (str): Voice key or file path.
+        voices_data (dict): Dictionary of existing style vectors.
+    Returns:
+        torch.Tensor: Style vector tensor of shape (1, D).
+    """
+    if key_or_path in voices_data:
+        print(f"Found style vector for '{key_or_path}' in '{VOICES_JSON_PATH}'.")
+        style_vec = torch.tensor(voices_data[key_or_path], dtype=torch.float32)
+    elif os.path.isfile(key_or_path):
+        print(
+            f"No existing style for '{key_or_path}'. Attempting to compute from audio..."
+        )
+        style_vec = inference.compute_style(key_or_path)
+        if style_vec is None:
+            raise ValueError(f"Failed to compute style vector from '{key_or_path}'.")
+        voices_data[key_or_path] = style_vec.squeeze(0).tolist()
+        save_json(voices_data, VOICES_JSON_PATH)
+        print(
+            f"Computed and saved new style vector for '{key_or_path}' to '{VOICES_JSON_PATH}'."
+        )
+    else:
+        raise ValueError(
+            f"'{key_or_path}' not found in '{VOICES_JSON_PATH}' and is not a valid file path."
+        )
+    print(f"Original style vector shape: {style_vec.shape}")
+    # Ensure style_vec is 2D: (1, D)
+    if style_vec.dim() == 1:
+        style_vec = style_vec.unsqueeze(0)
+        print(f"Unsqueezed style vector to shape: {style_vec.shape}")
+    elif style_vec.dim() == 3:
+        style_vec = style_vec.squeeze(1)
+        print(f"Squeezed style vector to shape: {style_vec.shape}")
+    elif style_vec.dim() != 2:
+        raise ValueError(
+            f"Unexpected style vector dimensions: {style_vec.shape}. Expected 2D tensor."
+        )
+    print(f"Processed style vector shape: {style_vec.shape}")
+    return style_vec
+def validate_style_vectors(voices_data: dict):
+    """
+    Validates that all style vectors in voices_data have the same dimensionality.
+    Args:
+        voices_data (dict): Dictionary containing style vectors.
+    Raises:
+        ValueError: If inconsistent vector lengths are found.
+    """
+    if not voices_data:
+        print("No style vectors to validate.")
+        return
+    lengths = set(len(vec) for vec in voices_data.values())
+    if len(lengths) > 1:
+        raise ValueError(
+            f"Inconsistent style vector lengths found: {lengths}. "
+            "All style vectors must have the same dimensionality."
+        )
+    print("All style vectors have consistent lengths.")
+def tts_normal(text: str, voice: str, speed: float = 1.2) -> Optional[np.ndarray]:
+    """
+    Load an existing style vector from voices.json if it exists and has 'voice'.
+    Otherwise, if 'voice' is a valid .wav file, compute its style vector
+    and store it. Finally, run normal TTS with the obtained style vector,
+    handling long texts.
+    Args:
+        text (str): The text to be synthesized.
+        voice (str): Either the key in voices.json or a .wav file path.
+        speed (float): Speed of the generated audio.
+    Returns:
+        Optional[np.ndarray]: Synthesized audio waveform, or None if something fails.
+    """
+    # Load voices_data
+    try:
+        voices_data = load_json(VOICES_JSON_PATH)
+        validate_style_vectors(voices_data)
+    except ValueError as e:
+        print(f"Error loading/validating '{VOICES_JSON_PATH}': {e}")
+        return None
+    try:
+        style_vec = get_or_compute_style_vector(voice, voices_data)
+    except ValueError as e:
+        print(e)
+        return None
+    if style_vec is None:
+        print("No style vector found or computed; cannot run TTS.")
+        return None
+    # Parse speed
+    speed = parse_speed(speed)
+    # Split text into manageable chunks using txtsplit
+    print("Splitting text into chunks...")
+    text_chunks = txtsplit(text)
+    print(f"Text split into {len(text_chunks)} chunks.")
+    # Synthesize audio using the core function
+    full_audio = synthesize_audio(
+        text_chunks=text_chunks,
+        style_vec=style_vec,
+        speed=speed,
+    )
+    return full_audio
+##############################################################################
+# TTS USING A DIRECTLY PROVIDED STYLE VECTOR
+##############################################################################
+def tts_with_style_vector(
+    text: str,
+    style_vec: torch.Tensor,
+    speed: float = 1.2,
+    alpha: float = 0.3,
+    beta: float = 0.7,
+    diffusion_steps: int = 7,
+    embedding_scale: float = 1.0,
+) -> Optional[np.ndarray]:
+    """
+    Perform TTS synthesis using a *directly provided* style vector.
+    Args:
+        text (str): The text to be spoken.
+        style_vec (torch.Tensor): A PyTorch tensor representing the style vector.
+                                  Should be shape (1, D) if the pipeline expects a batch dimension.
+        speed (float): Speed factor for TTS. (Use parse_speed to handle fancy inputs.)
+        alpha (float): Weight for alpha in your inference function.
+        beta (float): Weight for beta in your inference function.
+        diffusion_steps (int): Number of diffusion steps for your TTS pipeline.
+        embedding_scale (float): Classifier-free guidance scale or similar.
+    Returns:
+        Optional[np.ndarray]: Synthesized audio waveform as a NumPy array (float32), or None if synthesis fails.
+    """
+    # Ensure style_vec has shape (1, D)
+    if style_vec.dim() == 1:
+        style_vec = style_vec.unsqueeze(0)  # e.g. (D,) -> (1, D)
+        print(f"Unsqueezed style vector to shape: {style_vec.shape}")
+    elif style_vec.dim() == 3:
+        style_vec = style_vec.squeeze(1)
+        print(f"Squeezed style vector to shape: {style_vec.shape}")
+    elif style_vec.dim() != 2:
+        print(f"Unexpected style vector shape: {style_vec.shape}. Expected 2D tensor.")
+        return None
+    print(f"Style vector shape for synthesis: {style_vec.shape}")
+    # Parse speed
+    speed_val = parse_speed(speed)
+    # Split text into manageable chunks using txtsplit
+    print("Splitting text into chunks...")
+    text_chunks = txtsplit(text)
+    print(f"Text split into {len(text_chunks)} chunks.")
+    # Synthesize audio using the core function
+    full_audio = synthesize_audio(
+        text_chunks=text_chunks,
+        style_vec=style_vec,
+        speed=speed_val,
+        alpha=alpha,
+        beta=beta,
+        diffusion_steps=diffusion_steps,
+        embedding_scale=embedding_scale,
+    )
+    return full_audio
+##############################################################################
+# MAIN CLI
+##############################################################################
+def main():
+    parser = argparse.ArgumentParser(
+        description="Script to TTS with either random style sampling or normal style usage."
+    )
+    parser.add_argument(
+        "--text",
+        type=str,
+        default="Hello from a random style or normal style TTS script!",
+        help="Text to be spoken.",
+    )
+    parser.add_argument(
+        "--speed",
+        type=str,  # Changed to str to handle inputs like "120%"
+        default="1.2",
+        help="Speed of the generated audio (e.g., '120%', '1.2').",
+    )
+    parser.add_argument(
+        "--voice",
+        type=str,
+        default=None,
+        help="If not using --randomize, specify a voice key or .wav path to load/compute style.",
+    )
+    parser.add_argument(
+        "--randomize",
+        action="store_true",
+        help="Use random style sampling from a fitted Gaussian of known styles.",
+    )
+    parser.add_argument(
+        "--output", type=str, default="output.wav", help="Output WAV file name."
+    )
+    args = parser.parse_args()
+    if args.randomize:
+        # Approach: random style from distribution
+        print("Sampling a new random style vector from 'voices.json' distribution...")
+        audio, _ = tts_randomized(text=args.text, speed=args.speed)
+    else:
+        # Normal approach: use a style key or fallback
+        print("Using normal style approach (loading or computing from 'voices.json').")
+        if args.voice is None:
+            print("Error: --voice must be specified when not using --randomize.")
+            parser.print_help()
+            return
+        audio = tts_normal(text=args.text, voice=args.voice, speed=args.speed)
+    if audio is not None:
+        # Ensure audio is a NumPy array of type float32
+        if not isinstance(audio, np.ndarray):
+            print("Error: Synthesized audio is not a NumPy array.")
+            return
+        if audio.dtype != np.float32:
+            print(f"Converting audio from {audio.dtype} to float32.")
+            audio = audio.astype(np.float32)
+        # Save the concatenated audio
+        try:
+            sf.write(args.output, audio, 24000)
+            print(f"Audio saved to '{args.output}'.")
+        except Exception as e:
+            print(f"Failed to save audio to '{args.output}': {e}")
+    else:
+        print("No audio was generated. Check logs above for errors.")
+if __name__ == "__main__":
+    main()

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

voices.json ADDED Viewed

	@@ -0,0 +1,2840 @@

+{
+  "Richard_Male_EN_US": [
+    0.0838528722524643,
+    -0.20531457662582397,
+    -0.10166072100400925,
+    -0.08704791963100433,
+    -0.16887575387954712,
+    0.07563386857509613,
+    -0.11252004653215408,
+    0.0405205562710762,
+    0.17604444921016693,
+    -0.0022975243628025055,
+    0.07261361181735992,
+    0.01948651298880577,
+    0.03549861162900925,
+    0.10135026276111603,
+    -0.028263121843338013,
+    0.002969518303871155,
+    0.0983193963766098,
+    -0.0498846136033535,
+    -0.059726495295763016,
+    0.14428836107254028,
+    0.22363322973251343,
+    -0.18453018367290497,
+    -0.02805022895336151,
+    -0.04543690383434296,
+    -0.11356891691684723,
+    -0.13455606997013092,
+    0.09829540550708771,
+    -0.08688592910766602,
+    0.07672901451587677,
+    -0.18105418980121613,
+    -0.18634817004203796,
+    0.12421728670597076,
+    -0.007412530481815338,
+    -0.05725667625665665,
+    -0.1923050880432129,
+    0.12631717324256897,
+    0.19541586935520172,
+    -0.07617154717445374,
+    -0.08994933217763901,
+    -0.046437621116638184,
+    -0.043646734207868576,
+    -0.14466425776481628,
+    -0.10671895742416382,
+    -0.013705611228942871,
+    -0.207138791680336,
+    -0.752566933631897,
+    -0.1562556028366089,
+    0.15995296835899353,
+    -0.17764419317245483,
+    0.016572676599025726,
+    0.10811036825180054,
+    0.08399468660354614,
+    -0.08008511364459991,
+    -0.03857298195362091,
+    -0.0649266391992569,
+    -0.22515754401683807,
+    0.11427924036979675,
+    -0.24663931131362915,
+    0.1547779142856598,
+    -0.14620377123355865,
+    0.5633630752563477,
+    -0.1662401258945465,
+    -0.09044578671455383,
+    0.004183262586593628,
+    -0.0022140033543109894,
+    -0.04644201323390007,
+    0.09777267277240753,
+    0.007478602230548859,
+    0.09729039669036865,
+    0.014121796935796738,
+    -0.1283886432647705,
+    0.016886277124285698,
+    0.14353325963020325,
+    -0.019632495939731598,
+    -0.08626653999090195,
+    -0.13174456357955933,
+    -0.07509270310401917,
+    0.13293522596359253,
+    -0.006963282823562622,
+    0.04489506036043167,
+    0.1364283263683319,
+    -0.023780206218361855,
+    -0.13672304153442383,
+    0.0834212377667427,
+    -0.0881689265370369,
+    0.12765783071517944,
+    -0.04777761548757553,
+    0.006771944463253021,
+    0.10008896142244339,
+    -0.0004957094788551331,
+    -0.3792557418346405,
+    -0.2995077669620514,
+    -0.18048475682735443,
+    -0.05329251289367676,
+    0.15887054800987244,
+    -0.05502178147435188,
+    -0.002659738063812256,
+    -0.1500413417816162,
+    0.02029288560152054,
+    -0.13041751086711884,
+    -0.02238699235022068,
+    0.13223209977149963,
+    0.008205652236938477,
+    0.09628777951002121,
+    -0.005202248692512512,
+    0.06322506815195084,
+    0.2893131375312805,
+    0.13000035285949707,
+    -0.12980809807777405,
+    -0.19357866048812866,
+    0.010631434619426727,
+    -0.09848842024803162,
+    0.43145453929901123,
+    -0.07535015791654587,
+    0.030972477048635483,
+    -0.005491979420185089,
+    -0.23411263525485992,
+    0.0034233778715133667,
+    -0.03615764528512955,
+    0.11543036997318268,
+    -0.04371267557144165,
+    -0.11525161564350128,
+    0.023816093802452087,
+    0.028308294713497162,
+    -0.0039406418800354,
+    -0.13210836052894592,
+    0.06155262142419815,
+    -0.17223545908927917,
+    -0.07812541723251343,
+    -0.04739491268992424,
+    0.012321650981903076,
+    0.2732156217098236,
+    -0.006376683712005615,
+    0.2102227807044983,
+    -0.026796162128448486,
+    0.023160047829151154,
+    0.4432758688926697,
+    0.05461269989609718,
+    -0.05827128142118454,
+    -0.23424984514713287,
+    0.004663914442062378,
+    0.22082097828388214,
+    0.28713545203208923,
+    -0.08923299610614777,
+    -0.11788474768400192,
+    -0.14981813728809357,
+    0.008348524570465088,
+    0.15658962726593018,
+    0.2602955102920532,
+    -0.32679831981658936,
+    -0.38099929690361023,
+    0.22179332375526428,
+    -0.18678224086761475,
+    0.1026342585682869,
+    0.03508329764008522,
+    -0.0821741446852684,
+    -0.08279386162757874,
+    -0.2976434528827667,
+    -0.4842967391014099,
+    -0.22528287768363953,
+    0.05699944496154785,
+    -0.16488179564476013,
+    -0.15537670254707336,
+    0.17949746549129486,
+    -0.0883849710226059,
+    -0.07729420065879822,
+    -0.011546742171049118,
+    0.055838439613580704,
+    -0.2754920423030853,
+    0.24266156554222107,
+    -0.25005173683166504,
+    0.15594978630542755,
+    -0.2281457483768463,
+    -0.022545501589775085,
+    -0.18430665135383606,
+    -0.09129363298416138,
+    -0.46234992146492004,
+    0.18292169272899628,
+    0.3553994596004486,
+    0.18489143252372742,
+    0.16369718313217163,
+    0.06524109095335007,
+    -0.13083520531654358,
+    -0.004223830997943878,
+    0.5248246192932129,
+    -0.4541511535644531,
+    0.33416515588760376,
+    -0.23953655362129211,
+    0.039670318365097046,
+    -0.2775384485721588,
+    -0.06698162853717804,
+    -0.04875987395644188,
+    0.095211923122406,
+    -0.2521704435348511,
+    -0.1613994985818863,
+    -0.11273781210184097,
+    0.089663565158844,
+    0.4533567726612091,
+    -0.12889298796653748,
+    0.04630730301141739,
+    0.3440477252006531,
+    -0.29735368490219116,
+    -0.16942940652370453,
+    -0.30063536763191223,
+    -0.04887039214372635,
+    0.07650876045227051,
+    -0.24708417057991028,
+    0.11167217791080475,
+    0.41300496459007263,
+    -0.1990889608860016,
+    -0.03628714382648468,
+    0.1947430670261383,
+    0.002185918390750885,
+    -0.1734754890203476,
+    0.007325600832700729,
+    0.0758146345615387,
+    0.14343833923339844,
+    0.16034956276416779,
+    0.09238007664680481,
+    -0.14523102343082428,
+    0.07370740175247192,
+    -0.05756370723247528,
+    0.3471793532371521,
+    -0.09545806050300598,
+    -0.32518574595451355,
+    0.01974405348300934,
+    -0.07413671910762787,
+    -0.07332949340343475,
+    0.09689724445343018,
+    0.33527132868766785,
+    0.07647977769374847,
+    -0.07537016272544861,
+    0.19729986786842346,
+    -0.054191842675209045,
+    0.019159607589244843,
+    0.28645795583724976,
+    -0.0029614195227622986,
+    0.050225719809532166,
+    0.058712102472782135,
+    -0.020884737372398376,
+    0.32063227891921997,
+    0.13905727863311768,
+    -0.08830951899290085,
+    -0.2068493664264679,
+    -0.5085070133209229,
+    -0.19245725870132446,
+    0.07536047697067261,
+    0.3520141541957855,
+    -0.004317941144108772,
+    0.11286243796348572,
+    0.4874182343482971,
+    0.197415292263031,
+    -0.11710592359304428,
+    0.41149505972862244,
+    -0.024338984861969948,
+    -0.3128387928009033
+  ],
+  "Chuck_Male_EN_US": [
+    -0.080739326775074,
+    -0.05186597257852554,
+    -0.04643955081701279,
+    -0.16995930671691895,
+    -0.08520634472370148,
+    0.26354464888572693,
+    -0.16335150599479675,
+    0.03762347623705864,
+    0.12310560047626495,
+    0.11882100999355316,
+    0.25169509649276733,
+    0.16642478108406067,
+    0.04346868768334389,
+    0.05879981815814972,
+    -0.10555227845907211,
+    0.098129041492939,
+    0.07805588096380234,
+    0.07993364334106445,
+    0.02878241240978241,
+    0.10626713931560516,
+    0.18884220719337463,
+    -0.15791675448417664,
+    -0.046336084604263306,
+    -0.06054564565420151,
+    0.14667274057865143,
+    -0.1686663031578064,
+    0.13676004111766815,
+    0.017875753343105316,
+    0.034988172352313995,
+    -0.15061573684215546,
+    -0.06622567772865295,
+    -0.018525442108511925,
+    -0.0815289169549942,
+    -0.11815841495990753,
+    -0.2079353630542755,
+    -0.12242597341537476,
+    0.1685279756784439,
+    0.005029462277889252,
+    -0.04868243634700775,
+    -0.006423652172088623,
+    -0.03062119334936142,
+    -0.10892745107412338,
+    -0.029393166303634644,
+    -0.14687927067279816,
+    -0.10593554377555847,
+    -0.7461926937103271,
+    -0.1311473250389099,
+    0.19617816805839539,
+    0.0034797536209225655,
+    0.017870396375656128,
+    0.1417236328125,
+    0.14160263538360596,
+    -0.13305433094501495,
+    -0.019239917397499084,
+    0.0768553614616394,
+    -0.2461501657962799,
+    0.11662117391824722,
+    -0.3004859685897827,
+    0.07660801708698273,
+    -0.08291581273078918,
+    0.646587610244751,
+    0.05630920082330704,
+    0.018290594220161438,
+    -0.076407790184021,
+    -0.022635802626609802,
+    -0.033361952751874924,
+    0.13813386857509613,
+    0.23338472843170166,
+    0.1526336669921875,
+    -0.010544595308601856,
+    0.0645538792014122,
+    -0.12352045625448227,
+    0.23500092327594757,
+    0.023993253707885742,
+    -0.18476025760173798,
+    0.08681316673755646,
+    -0.24495398998260498,
+    0.1805841475725174,
+    0.019407637417316437,
+    -0.1164683923125267,
+    0.04219061881303787,
+    -0.05617094039916992,
+    -0.06486696004867554,
+    0.11154982447624207,
+    0.1624276340007782,
+    0.037209782749414444,
+    0.024621259421110153,
+    -0.011263281106948853,
+    0.07437050342559814,
+    -0.027852090075612068,
+    -0.38197386264801025,
+    -0.12534263730049133,
+    -0.15090727806091309,
+    -0.016294121742248535,
+    0.20195062458515167,
+    -0.10578630119562149,
+    0.0834207683801651,
+    -0.2002831995487213,
+    0.08801543712615967,
+    -0.1744777411222458,
+    -0.019703160971403122,
+    0.18256394565105438,
+    0.019218653440475464,
+    -0.06123323366045952,
+    0.03127707168459892,
+    0.1301468461751938,
+    0.2009834200143814,
+    0.07228143513202667,
+    -0.05553338676691055,
+    -0.44165605306625366,
+    0.27527377009391785,
+    -0.08356539905071259,
+    0.5484572649002075,
+    -0.22545355558395386,
+    0.10192038118839264,
+    0.13001194596290588,
+    -0.1598789542913437,
+    0.03745634853839874,
+    0.1973172128200531,
+    0.10266508162021637,
+    -0.10606272518634796,
+    -0.043102242052555084,
+    0.023647010326385498,
+    0.0016689617186784744,
+    0.35201117396354675,
+    -0.24116483330726624,
+    0.022700302302837372,
+    -0.29655206203460693,
+    -0.11883702874183655,
+    0.06606853753328323,
+    -0.09965553879737854,
+    0.3298850655555725,
+    -0.10845916718244553,
+    0.12123875319957733,
+    -0.2904745042324066,
+    0.16466909646987915,
+    0.5250499844551086,
+    0.26844123005867004,
+    -0.1581430584192276,
+    -0.01679089665412903,
+    0.10683909058570862,
+    0.13829728960990906,
+    -0.0774451196193695,
+    -0.09715490788221359,
+    -0.15488898754119873,
+    0.09843119978904724,
+    0.18665491044521332,
+    0.499192476272583,
+    0.25495558977127075,
+    -0.3385838270187378,
+    -0.11540469527244568,
+    0.3943556845188141,
+    -0.028287045657634735,
+    0.10370328277349472,
+    0.3033093214035034,
+    -0.13608571887016296,
+    -0.07120000571012497,
+    -0.14757874608039856,
+    -0.2092522382736206,
+    -0.349817156791687,
+    -0.042082756757736206,
+    -0.28092268109321594,
+    -0.3474852740764618,
+    -0.1188138797879219,
+    -0.3396819233894348,
+    0.16733011603355408,
+    0.09606117755174637,
+    0.10766048729419708,
+    -0.19903156161308289,
+    -0.30723923444747925,
+    -0.3617871105670929,
+    -0.24389447271823883,
+    -0.33159559965133667,
+    -0.0794874057173729,
+    -0.17680421471595764,
+    -0.09732476621866226,
+    -0.6020764708518982,
+    -0.23046669363975525,
+    0.4663076400756836,
+    -0.03177022933959961,
+    0.31536608934402466,
+    0.17449232935905457,
+    0.19207462668418884,
+    -0.20476028323173523,
+    0.3841668367385864,
+    -0.46485692262649536,
+    0.07715408504009247,
+    -0.0481671467423439,
+    0.06056290864944458,
+    0.23023973405361176,
+    -0.44577276706695557,
+    0.08608794212341309,
+    0.174083411693573,
+    -0.20057682693004608,
+    0.0015126615762710571,
+    0.30561572313308716,
+    -0.0823325663805008,
+    0.6210863590240479,
+    0.12966740131378174,
+    -0.27340665459632874,
+    0.31147849559783936,
+    0.17376896739006042,
+    -0.28636378049850464,
+    0.1761811077594757,
+    -0.3044331669807434,
+    0.0771920382976532,
+    -0.04131172597408295,
+    0.17433065176010132,
+    0.11375144124031067,
+    -0.06075088679790497,
+    -0.21101467311382294,
+    0.23215331137180328,
+    0.3220982849597931,
+    -0.08772740513086319,
+    -0.1110156923532486,
+    0.150890052318573,
+    0.0899096205830574,
+    -0.11286906898021698,
+    -0.18477720022201538,
+    0.12096066772937775,
+    0.33322685956954956,
+    -0.2950510084629059,
+    0.2563823163509369,
+    0.11760752648115158,
+    -0.3458101749420166,
+    -0.4250616133213043,
+    -0.25533783435821533,
+    -0.2633964717388153,
+    -0.026663780212402344,
+    0.4655682444572449,
+    0.3740382790565491,
+    -0.05553853511810303,
+    0.024137284606695175,
+    -0.044697582721710205,
+    0.03481140360236168,
+    0.01849237084388733,
+    -0.15648233890533447,
+    -0.719332218170166,
+    0.5206979513168335,
+    -0.022456303238868713,
+    0.6854866743087769,
+    -0.32744836807250977,
+    -0.08906684815883636,
+    -0.02081950753927231,
+    -0.8612825870513916,
+    -0.1892240047454834,
+    0.07316698133945465,
+    0.4908924400806427,
+    0.30862411856651306,
+    0.3830990791320801,
+    0.38602370023727417,
+    0.25254443287849426,
+    0.26230084896087646,
+    0.12000225484371185,
+    0.0641913115978241,
+    -0.5113836526870728
+  ],
+  "Sol_Female_EN_US": [
+    0.1268293261528015,
+    -0.24892280995845795,
+    0.03928159922361374,
+    -0.08916330337524414,
+    -0.08921554684638977,
+    0.018120769411325455,
+    0.009445525705814362,
+    0.09456969052553177,
+    0.23499509692192078,
+    0.12589207291603088,
+    0.0817081481218338,
+    -0.05610091611742973,
+    -0.11433179676532745,
+    0.031890347599983215,
+    0.01497705653309822,
+    0.10599376261234283,
+    0.03902814909815788,
+    0.01317581906914711,
+    0.008249595761299133,
+    0.010834900662302971,
+    0.1323947310447693,
+    -0.14897435903549194,
+    -0.044409990310668945,
+    -0.004388481378555298,
+    -0.02122711017727852,
+    -0.21078309416770935,
+    0.05238814279437065,
+    -0.24263539910316467,
+    0.10478609800338745,
+    -0.046628206968307495,
+    -0.061156079173088074,
+    0.04726453870534897,
+    0.19356507062911987,
+    -0.10425321012735367,
+    -0.1245705783367157,
+    0.2371465265750885,
+    0.15406547486782074,
+    -0.11537078022956848,
+    -0.2574460506439209,
+    0.11589224636554718,
+    0.04982087016105652,
+    -0.0768856406211853,
+    -0.11789155006408691,
+    -0.13019400835037231,
+    0.03559808060526848,
+    -0.47099581360816956,
+    0.06938941776752472,
+    0.19138163328170776,
+    0.17706745862960815,
+    0.035381563007831573,
+    0.09636449813842773,
+    0.07912801951169968,
+    0.06765618175268173,
+    -0.1303500384092331,
+    -0.039963360875844955,
+    -0.04088369756937027,
+    0.02034657448530197,
+    0.008391611278057098,
+    0.02184874564409256,
+    -0.03052680939435959,
+    0.3498419225215912,
+    -0.07705945521593094,
+    -0.2935195565223694,
+    0.034476667642593384,
+    -0.1314329355955124,
+    0.20076632499694824,
+    0.016021449118852615,
+    0.23033341765403748,
+    -0.03349122032523155,
+    -0.18335162103176117,
+    0.029580311849713326,
+    0.018869629129767418,
+    0.10253989696502686,
+    -0.09266053140163422,
+    -0.03108178824186325,
+    -0.03976592794060707,
+    0.13201536238193512,
+    -0.028312936425209045,
+    -0.09032510221004486,
+    0.05712374672293663,
+    -0.17886731028556824,
+    -0.00012268498539924622,
+    -0.17655304074287415,
+    0.21560686826705933,
+    0.07977418601512909,
+    0.09157729148864746,
+    -0.08235643059015274,
+    -0.034677520394325256,
+    0.2231934666633606,
+    0.1851099133491516,
+    -0.2730552554130554,
+    -0.2409580945968628,
+    -0.273377925157547,
+    -0.11498671770095825,
+    0.29265373945236206,
+    -0.10599346458911896,
+    -0.05672678351402283,
+    0.026578396558761597,
+    -0.22945210337638855,
+    -0.08645745366811752,
+    0.028000690042972565,
+    -0.13934218883514404,
+    0.11353091895580292,
+    0.060757409781217575,
+    0.11343018710613251,
+    0.053218141198158264,
+    0.3181232810020447,
+    0.10948897153139114,
+    0.0357043594121933,
+    -0.1203552708029747,
+    0.11475016921758652,
+    -0.005062885582447052,
+    0.3342074751853943,
+    -0.1266603022813797,
+    0.07479999959468842,
+    -0.008454116061329842,
+    0.12023192644119263,
+    -0.03595118224620819,
+    0.02898475155234337,
+    -0.020386993885040283,
+    0.006668185815215111,
+    -0.15364103019237518,
+    -0.11951534450054169,
+    -0.0910012498497963,
+    0.19956853985786438,
+    0.04014497250318527,
+    -0.09457655251026154,
+    -0.12396776676177979,
+    0.23229674994945526,
+    -0.15745335817337036,
+    0.17193259298801422,
+    0.040711648762226105,
+    -0.12352880835533142,
+    0.018167633563280106,
+    -0.08081409335136414,
+    0.23432570695877075,
+    0.17171189188957214,
+    -0.03221336752176285,
+    0.03773265331983566,
+    -0.06490489095449448,
+    -0.030414387583732605,
+    0.4086611866950989,
+    0.07678371667861938,
+    0.15471185743808746,
+    0.009691998362541199,
+    0.21592354774475098,
+    0.16220787167549133,
+    0.13170170783996582,
+    0.11527039110660553,
+    -0.3844143748283386,
+    0.0421525314450264,
+    0.4349702298641205,
+    -0.1686660647392273,
+    0.005835492163896561,
+    -0.05163434147834778,
+    -0.38664859533309937,
+    0.09356559813022614,
+    -0.2766155004501343,
+    -0.13494873046875,
+    -0.07143319398164749,
+    -0.0797828882932663,
+    -0.10624134540557861,
+    -0.05675575137138367,
+    0.2754574418067932,
+    0.11232379078865051,
+    -0.026216700673103333,
+    -0.37042930722236633,
+    0.04595255106687546,
+    -0.08378663659095764,
+    0.113258957862854,
+    -0.10497808456420898,
+    -0.3882599174976349,
+    -0.09268787503242493,
+    -0.009513184428215027,
+    -0.03547880798578262,
+    -0.11325360834598541,
+    -0.4920811951160431,
+    -0.2420617938041687,
+    0.004631944000720978,
+    0.3054035007953644,
+    0.12272718548774719,
+    -0.1861076056957245,
+    -0.1328718364238739,
+    0.22628089785575867,
+    0.1674436330795288,
+    -0.2189907729625702,
+    0.25414198637008667,
+    0.08179888129234314,
+    0.014794200658798218,
+    -0.45081019401550293,
+    -0.4995046854019165,
+    -0.0721922218799591,
+    0.20731398463249207,
+    -0.07364560663700104,
+    -0.17112991213798523,
+    0.20308616757392883,
+    0.0781199038028717,
+    0.20510229468345642,
+    -0.18790192902088165,
+    0.08215056359767914,
+    0.05191810801625252,
+    -0.15418048202991486,
+    -0.1164349764585495,
+    -0.30107319355010986,
+    -0.07877662777900696,
+    0.006951943039894104,
+    -0.2136976420879364,
+    0.18753382563591003,
+    0.1558315008878708,
+    0.03319445252418518,
+    -0.20069114863872528,
+    0.5186187028884888,
+    0.29910457134246826,
+    -0.022099845111370087,
+    -0.2004503756761551,
+    0.11575216799974442,
+    0.06575708091259003,
+    0.29491111636161804,
+    0.042733918875455856,
+    0.13065889477729797,
+    -0.025842148810625076,
+    -0.48179322481155396,
+    0.12712322175502777,
+    -0.22928954660892487,
+    -0.4731486141681671,
+    0.2035326510667801,
+    -0.33841538429260254,
+    -0.09808406233787537,
+    0.30838146805763245,
+    0.06581465899944305,
+    0.047930970788002014,
+    0.01692097634077072,
+    0.22469750046730042,
+    -0.05486059561371803,
+    0.35013893246650696,
+    -0.283150851726532,
+    0.05401553213596344,
+    -0.04293721914291382,
+    0.03238523006439209,
+    0.30903106927871704,
+    0.318570613861084,
+    -0.36268168687820435,
+    0.01699633151292801,
+    -0.122194804251194,
+    -0.08210300654172897,
+    -0.08749544620513916,
+    0.04085458070039749,
+    0.26824674010276794,
+    -0.20407041907310486,
+    0.3028109669685364,
+    0.11649337410926819,
+    -0.06361576169729233,
+    0.022716812789440155,
+    0.8145036101341248,
+    -0.001978829503059387,
+    -0.19634583592414856
+  ],
+  "Georgia_Female_EN_US": [
+    0.14149390161037445,
+    -0.19759099185466766,
+    0.029538815841078758,
+    -0.1644008457660675,
+    -0.16974563896656036,
+    0.15899056196212769,
+    -0.08187974989414215,
+    0.06346520036458969,
+    0.171818345785141,
+    -0.03900427371263504,
+    0.08924897015094757,
+    0.11517727375030518,
+    -0.09470553696155548,
+    0.039182037115097046,
+    -0.0800875872373581,
+    0.027626454830169678,
+    0.057931605726480484,
+    -0.05594071373343468,
+    -0.01764649897813797,
+    0.1859845221042633,
+    0.19512777030467987,
+    -0.2715531587600708,
+    -0.15435153245925903,
+    -0.07994608581066132,
+    0.0034161433577537537,
+    -0.27405399084091187,
+    0.06616479158401489,
+    0.028649676591157913,
+    0.24419546127319336,
+    -0.053172968327999115,
+    -0.06803376972675323,
+    0.08285264670848846,
+    -0.03827327489852905,
+    -0.05404618754982948,
+    -0.1717120110988617,
+    0.0565122552216053,
+    0.12560471892356873,
+    -0.07519722729921341,
+    -0.005836378782987595,
+    -0.049631841480731964,
+    0.035924024879932404,
+    -0.20555508136749268,
+    -0.16342787444591522,
+    -0.011107422411441803,
+    -0.09510314464569092,
+    -0.8373715877532959,
+    -0.056464750319719315,
+    0.15504246950149536,
+    0.12261460721492767,
+    -0.002536684274673462,
+    0.14500755071640015,
+    0.17729829251766205,
+    -0.16478273272514343,
+    -0.07822693139314651,
+    0.03328864276409149,
+    -0.3484482765197754,
+    0.07604808360338211,
+    -0.22294224798679352,
+    0.06523670256137848,
+    -0.22709456086158752,
+    0.8876799941062927,
+    0.0027947500348091125,
+    0.0007318109273910522,
+    0.002863973379135132,
+    -0.21034874022006989,
+    0.051948100328445435,
+    -0.004550091922283173,
+    0.17473770678043365,
+    0.1153031662106514,
+    -0.09051527082920074,
+    -0.07489325851202011,
+    0.03644700348377228,
+    0.1395515352487564,
+    -0.010498672723770142,
+    -0.16194367408752441,
+    0.11820540577173233,
+    -0.1125202625989914,
+    0.07222796976566315,
+    0.0924602597951889,
+    0.009883157908916473,
+    0.14233753085136414,
+    -0.04211493209004402,
+    -0.09790381044149399,
+    0.1432836949825287,
+    -0.0207438375800848,
+    0.09433138370513916,
+    0.03480076417326927,
+    0.014073198661208153,
+    0.1459684669971466,
+    0.06838452816009521,
+    -0.4587509036064148,
+    -0.24484041333198547,
+    -0.13059453666210175,
+    -0.014020655304193497,
+    -0.04615045711398125,
+    -0.10020460933446884,
+    0.05875978618860245,
+    -0.11167144775390625,
+    -0.08788008987903595,
+    -0.06586126983165741,
+    0.0656682550907135,
+    0.06709162890911102,
+    0.02795044332742691,
+    0.11588016897439957,
+    -0.09147179126739502,
+    0.08282454311847687,
+    0.19108889997005463,
+    -0.09372390806674957,
+    -0.0004408508539199829,
+    -0.40825721621513367,
+    0.24378983676433563,
+    0.06450286507606506,
+    0.40147995948791504,
+    -0.12383461743593216,
+    0.09264419227838516,
+    0.04705287888646126,
+    -0.0979108139872551,
+    -0.04610448330640793,
+    0.06577446311712265,
+    0.06107745319604874,
+    -0.0739186629652977,
+    0.03969721123576164,
+    0.0321660116314888,
+    0.2023421972990036,
+    0.22365602850914001,
+    -0.33337128162384033,
+    0.10086256265640259,
+    -0.23017814755439758,
+    0.15227298438549042,
+    0.08262811601161957,
+    0.028533905744552612,
+    0.16887661814689636,
+    -0.1553392857313156,
+    0.04320569336414337,
+    -0.18707242608070374,
+    0.021115079522132874,
+    0.3647507131099701,
+    0.2119525820016861,
+    -0.02559354156255722,
+    0.268862247467041,
+    -0.03270912170410156,
+    0.01871364563703537,
+    -0.0919923335313797,
+    -0.13874371349811554,
+    -0.092261902987957,
+    0.0468045249581337,
+    0.29371997714042664,
+    0.21063821017742157,
+    -0.12585729360580444,
+    -0.4214266538619995,
+    -0.17777106165885925,
+    0.14042110741138458,
+    -0.1407075822353363,
+    -0.1934659481048584,
+    0.015365049242973328,
+    -0.12806877493858337,
+    -0.01690494269132614,
+    -0.2808881402015686,
+    -0.32276445627212524,
+    -0.04267498850822449,
+    0.04772596061229706,
+    -0.13011249899864197,
+    -0.4758068323135376,
+    0.21355567872524261,
+    -0.12164445221424103,
+    -0.10112264752388,
+    -0.0498490147292614,
+    0.2474687546491623,
+    -0.40088728070259094,
+    -0.21887987852096558,
+    -0.4579368829727173,
+    -0.21036852896213531,
+    -0.18377023935317993,
+    -0.23978865146636963,
+    -0.15847837924957275,
+    -0.36417555809020996,
+    -0.1878042072057724,
+    -0.12206757068634033,
+    0.4226543605327606,
+    -0.00703008845448494,
+    0.17988801002502441,
+    0.235824853181839,
+    0.0072716958820819855,
+    -0.022622771561145782,
+    0.4673866033554077,
+    -0.4320169687271118,
+    0.27932173013687134,
+    -0.1372895985841751,
+    0.13946086168289185,
+    -0.011557169258594513,
+    -0.11092820018529892,
+    -0.0025858357548713684,
+    0.06566678732633591,
+    -0.25665807723999023,
+    -0.2400297075510025,
+    0.055859118700027466,
+    -0.24934203922748566,
+    -0.05649476498365402,
+    -0.021823860704898834,
+    0.07491856813430786,
+    0.028743356466293335,
+    0.21002137660980225,
+    -0.5215728282928467,
+    0.05622958019375801,
+    -0.2222532033920288,
+    0.1794230341911316,
+    0.11855436116456985,
+    0.14668777585029602,
+    0.45487338304519653,
+    -0.1859143078327179,
+    -0.05654382333159447,
+    -0.16731679439544678,
+    -0.1562391221523285,
+    0.16424456238746643,
+    0.2154158502817154,
+    0.3380601406097412,
+    0.12264357507228851,
+    0.3392817974090576,
+    -0.060549046844244,
+    -0.14765986800193787,
+    0.11267101764678955,
+    -0.24056652188301086,
+    0.03510596603155136,
+    0.10618807375431061,
+    -0.15641556680202484,
+    -0.24543322622776031,
+    -0.19173413515090942,
+    -0.011205855756998062,
+    0.24790503084659576,
+    0.32398396730422974,
+    0.22276073694229126,
+    0.018482085317373276,
+    -0.03579630330204964,
+    0.05034150183200836,
+    0.29536929726600647,
+    -0.050280749797821045,
+    -0.014656215906143188,
+    -0.3677038550376892,
+    0.41170692443847656,
+    0.15874658524990082,
+    0.34870871901512146,
+    -0.23689004778862,
+    0.2970763146877289,
+    0.0950806587934494,
+    -0.1269141584634781,
+    -0.11931035667657852,
+    0.13633345067501068,
+    0.42843636870384216,
+    0.03449300676584244,
+    0.4283212721347809,
+    0.2762477397918701,
+    0.1679811179637909,
+    0.2898493707180023,
+    -0.04722334071993828,
+    -0.047664619982242584,
+    -0.22933121025562286
+  ],
+  "Marry_Female_EN_US": [
+    0.10095467418432236,
+    0.046844299882650375,
+    0.05421638488769531,
+    -0.09417131543159485,
+    -0.18054454028606415,
+    0.0935884565114975,
+    -0.11312611401081085,
+    0.02784895896911621,
+    0.13980317115783691,
+    -0.08165936917066574,
+    0.10532249510288239,
+    0.09783805906772614,
+    0.01645722985267639,
+    0.04216833412647247,
+    -0.1025347113609314,
+    0.09854228794574738,
+    0.22359934449195862,
+    0.08323220163583755,
+    0.003406684845685959,
+    0.30394530296325684,
+    0.3451034426689148,
+    -0.29881906509399414,
+    -0.08311712741851807,
+    -0.109955795109272,
+    0.07522188872098923,
+    -0.38127946853637695,
+    0.029290571808815002,
+    -0.012949233874678612,
+    0.22986799478530884,
+    -0.22929272055625916,
+    -0.11333343386650085,
+    0.1066955029964447,
+    -0.03432668745517731,
+    -0.10237376391887665,
+    -0.11407271027565002,
+    -0.01221979409456253,
+    0.19828736782073975,
+    -0.08432801812887192,
+    -0.07885870337486267,
+    -0.09633795917034149,
+    0.07740725576877594,
+    -0.14024686813354492,
+    0.007659006863832474,
+    -0.061528440564870834,
+    -0.15116117894649506,
+    -0.9346913695335388,
+    -0.19321316480636597,
+    0.09346023201942444,
+    0.008720653131604195,
+    0.00935569778084755,
+    0.059522844851017,
+    -0.0004963874816894531,
+    -0.11127720773220062,
+    -0.015941940248012543,
+    0.11759459227323532,
+    -0.38565748929977417,
+    0.014210086315870285,
+    -0.4402802586555481,
+    -0.03058554232120514,
+    -0.15320685505867004,
+    0.925262451171875,
+    0.05237797647714615,
+    -0.06457516551017761,
+    0.04277027025818825,
+    -0.09071764349937439,
+    -0.023430675268173218,
+    0.018660694360733032,
+    0.28416356444358826,
+    0.15927383303642273,
+    -0.036094918847084045,
+    -0.18289241194725037,
+    -0.16174408793449402,
+    0.1352432817220688,
+    -0.11155793070793152,
+    -0.21458107233047485,
+    -0.007756996899843216,
+    -0.17188167572021484,
+    -0.014599844813346863,
+    0.03282542526721954,
+    0.10045303404331207,
+    0.11301460862159729,
+    -0.04795491322875023,
+    -0.05172593891620636,
+    0.11332973837852478,
+    0.07555423676967621,
+    0.10994540899991989,
+    -0.07060486078262329,
+    0.004575258120894432,
+    0.11668689548969269,
+    0.10401762276887894,
+    -0.6524990797042847,
+    -0.2616415023803711,
+    -0.2577282190322876,
+    0.013369720429182053,
+    0.015914201736450195,
+    -0.1528300940990448,
+    0.06751468777656555,
+    -0.2593517601490021,
+    -0.04114726930856705,
+    -0.12167482078075409,
+    -0.016297057271003723,
+    0.050480689853429794,
+    0.08136023581027985,
+    0.10589707642793655,
+    0.0009140158072113991,
+    0.09637215733528137,
+    0.13627931475639343,
+    -0.08097885549068451,
+    -0.04615870863199234,
+    -0.3712182939052582,
+    0.17477694153785706,
+    -0.03713107109069824,
+    0.3938117027282715,
+    -0.16526120901107788,
+    0.046960875391960144,
+    0.13634932041168213,
+    -0.2535812258720398,
+    -0.004662476480007172,
+    -0.1287195086479187,
+    0.04682536423206329,
+    -0.0553663894534111,
+    -0.007208423689007759,
+    -0.03398251533508301,
+    0.04032691568136215,
+    0.10826413333415985,
+    -0.3297663927078247,
+    0.12369295954704285,
+    -0.22297564148902893,
+    0.016204068437218666,
+    0.22467367351055145,
+    0.06251698732376099,
+    0.25536197423934937,
+    -0.0313156396150589,
+    0.23897168040275574,
+    -0.125069260597229,
+    0.05682749301195145,
+    0.2709246873855591,
+    0.11623440682888031,
+    -0.08916947990655899,
+    -0.0015965849161148071,
+    0.021189596503973007,
+    0.1729092001914978,
+    -0.20169132947921753,
+    -0.010327596217393875,
+    -0.036886122077703476,
+    0.01917070895433426,
+    0.18902111053466797,
+    0.5179728269577026,
+    0.31896597146987915,
+    -0.7427007555961609,
+    -0.4137954115867615,
+    0.06960596889257431,
+    0.06620097160339355,
+    -0.1536514014005661,
+    0.1503698229789734,
+    -0.14556577801704407,
+    0.14588545262813568,
+    -0.18597960472106934,
+    -0.342746764421463,
+    0.05013357102870941,
+    -0.02868656814098358,
+    -0.26822707056999207,
+    -0.3400660753250122,
+    -0.02838587388396263,
+    0.004168674349784851,
+    -0.17664480209350586,
+    0.020639866590499878,
+    0.14682283997535706,
+    -0.2350459098815918,
+    -0.19191408157348633,
+    -0.1417776346206665,
+    -0.15922360122203827,
+    -0.2131480872631073,
+    -0.07812295854091644,
+    -0.22641682624816895,
+    -0.08451198041439056,
+    -0.27697646617889404,
+    -0.16417096555233002,
+    0.17140953242778778,
+    -0.014865081757307053,
+    0.1978265941143036,
+    0.17148619890213013,
+    0.051121003925800323,
+    -0.0503043532371521,
+    0.4206354022026062,
+    -0.6276400089263916,
+    0.19541236758232117,
+    -0.05150662735104561,
+    0.20107480883598328,
+    0.30449116230010986,
+    -0.2169422060251236,
+    0.07420805841684341,
+    0.19388242065906525,
+    -0.09795433282852173,
+    -0.3506614565849304,
+    0.29341161251068115,
+    0.036286331713199615,
+    0.2008945643901825,
+    -0.13470220565795898,
+    -0.3342384099960327,
+    0.049659550189971924,
+    -0.23403503000736237,
+    -0.2438443899154663,
+    -0.0661768913269043,
+    -0.15710312128067017,
+    0.06331466138362885,
+    0.1287412941455841,
+    0.2185135930776596,
+    0.2592580318450928,
+    -0.25041234493255615,
+    0.04421650990843773,
+    0.031913772225379944,
+    0.1675594300031662,
+    0.1488073468208313,
+    -0.14290811121463776,
+    0.3285759687423706,
+    0.22858765721321106,
+    0.3819029927253723,
+    -0.04735409840941429,
+    -0.18470162153244019,
+    0.2156306505203247,
+    -0.262035995721817,
+    0.1106458529829979,
+    -0.33096078038215637,
+    -0.40564554929733276,
+    -0.1130962148308754,
+    -0.4035260081291199,
+    0.009994406253099442,
+    0.19823773205280304,
+    0.09995241463184357,
+    0.10737255960702896,
+    0.16501155495643616,
+    -0.21067723631858826,
+    -0.011145517230033875,
+    0.013040252029895782,
+    -0.3106451630592346,
+    -0.048852063715457916,
+    -0.2291208952665329,
+    0.28707051277160645,
+    0.11026108264923096,
+    0.5360386967658997,
+    -0.1761239767074585,
+    0.07656016945838928,
+    -0.07331065833568573,
+    -0.47247397899627686,
+    -0.21432432532310486,
+    -0.21592572331428528,
+    0.6710861921310425,
+    0.11024707555770874,
+    0.19684234261512756,
+    0.2528229355812073,
+    0.21830880641937256,
+    0.1830369234085083,
+    0.07172520458698273,
+    0.24994215369224548,
+    -0.14005254209041595
+  ],
+  "Samuel_Male_EN_US": [
+    -0.12619435787200928,
+    -0.11846257001161575,
+    0.04108911007642746,
+    -0.10919006168842316,
+    -0.18582119047641754,
+    0.3603861629962921,
+    -0.08595605194568634,
+    -0.02000698447227478,
+    0.19657589495182037,
+    0.1481103152036667,
+    0.15841630101203918,
+    0.13725560903549194,
+    -0.023550238460302353,
+    0.11064086854457855,
+    0.0004522055387496948,
+    0.039599962532520294,
+    0.03390733152627945,
+    -0.0010563544929027557,
+    -0.06491883099079132,
+    0.21764393150806427,
+    0.16938678920269012,
+    0.11513420194387436,
+    0.08827359974384308,
+    -0.0926792100071907,
+    0.14648687839508057,
+    -0.21553286910057068,
+    0.023113107308745384,
+    -0.121593177318573,
+    0.11240999400615692,
+    -0.12347493320703506,
+    -0.18039049208164215,
+    0.1588599681854248,
+    -0.17081257700920105,
+    -0.2037820667028427,
+    -0.1563880443572998,
+    -0.0917324647307396,
+    0.03558758646249771,
+    -0.11815845966339111,
+    -0.20688572525978088,
+    -0.0739545151591301,
+    -0.0853046327829361,
+    -0.21343617141246796,
+    0.04951489716768265,
+    -0.44510698318481445,
+    -0.09933532774448395,
+    -0.8458589911460876,
+    -0.16738075017929077,
+    0.16498824954032898,
+    0.01717434823513031,
+    -0.06231179088354111,
+    0.16252321004867554,
+    0.04281383752822876,
+    -0.12609757483005524,
+    0.05567052215337753,
+    -0.027309387922286987,
+    -0.3515397310256958,
+    0.04615774750709534,
+    -0.6998573541641235,
+    0.06659585237503052,
+    -0.13769188523292542,
+    0.6031708717346191,
+    0.011859655380249023,
+    -0.33847033977508545,
+    -0.12553080916404724,
+    -0.18536464869976044,
+    -0.007513280957937241,
+    0.519316554069519,
+    0.23655596375465393,
+    0.24828562140464783,
+    0.13927540183067322,
+    0.14041166007518768,
+    -0.09467436373233795,
+    0.11043473333120346,
+    -0.06588282436132431,
+    -0.11430786550045013,
+    0.026140939444303513,
+    -0.2597509026527405,
+    0.24612551927566528,
+    0.10610029101371765,
+    -0.23051029443740845,
+    -0.2547153830528259,
+    -0.002995643764734268,
+    0.17297737300395966,
+    0.03597598150372505,
+    0.15152215957641602,
+    -0.06193822994828224,
+    -0.07088689506053925,
+    0.03826475143432617,
+    0.08488129824399948,
+    0.2690422534942627,
+    -0.5332049131393433,
+    -0.2220773696899414,
+    -0.5455571413040161,
+    -0.09902779757976532,
+    0.21370843052864075,
+    -0.14772546291351318,
+    0.17388656735420227,
+    -0.12971428036689758,
+    0.019031524658203125,
+    -0.03160820156335831,
+    0.002624817192554474,
+    0.023578234016895294,
+    0.12251117080450058,
+    -0.0782250463962555,
+    0.03130299225449562,
+    0.11962416768074036,
+    0.23477508127689362,
+    0.07299475371837616,
+    -0.0542200468480587,
+    -0.4101184606552124,
+    0.08486519753932953,
+    -0.04950159043073654,
+    0.44324159622192383,
+    -0.20634889602661133,
+    0.059250980615615845,
+    -0.016344502568244934,
+    -0.10346954315900803,
+    0.05175183713436127,
+    0.10785773396492004,
+    -0.13179540634155273,
+    -0.21312423050403595,
+    0.08348912745714188,
+    -0.1100846529006958,
+    -0.0912843644618988,
+    0.16458404064178467,
+    -0.22458869218826294,
+    0.057109322398900986,
+    -0.14130070805549622,
+    -0.27792462706565857,
+    0.2201579511165619,
+    0.14192339777946472,
+    0.370261013507843,
+    -0.1133350059390068,
+    0.5210106372833252,
+    -0.2802170515060425,
+    0.2284509539604187,
+    0.08346766233444214,
+    0.2824746072292328,
+    -0.2277216911315918,
+    0.28492996096611023,
+    0.13533133268356323,
+    0.17641571164131165,
+    0.055212635546922684,
+    -0.1420554518699646,
+    -0.3370305895805359,
+    -0.05939646065235138,
+    -0.012498218566179276,
+    0.3336101472377777,
+    -0.009854704141616821,
+    -0.21461555361747742,
+    -0.06897716224193573,
+    0.42768532037734985,
+    0.2918033301830292,
+    0.17972910404205322,
+    0.08387543261051178,
+    -0.3834030032157898,
+    0.04259374737739563,
+    -0.24948984384536743,
+    -0.4711057245731354,
+    0.23269453644752502,
+    -0.11432869732379913,
+    -0.3319881558418274,
+    -0.5810990333557129,
+    -0.16500617563724518,
+    -0.513327419757843,
+    -0.22280623018741608,
+    -0.06446528434753418,
+    0.053828924894332886,
+    -0.3869067430496216,
+    -0.18398475646972656,
+    -0.39183861017227173,
+    -0.1645130217075348,
+    -0.10562220215797424,
+    0.09369634836912155,
+    -0.10099713504314423,
+    -0.2670536935329437,
+    -0.3405263423919678,
+    -0.4056051969528198,
+    0.4251013398170471,
+    -0.06249173730611801,
+    0.045685671269893646,
+    0.4523245692253113,
+    0.03759829327464104,
+    -0.17996317148208618,
+    0.4454271197319031,
+    -0.9131506681442261,
+    0.12587328255176544,
+    0.03575079143047333,
+    0.1569804549217224,
+    -0.005473516881465912,
+    -0.20985543727874756,
+    0.02623889595270157,
+    0.27007320523262024,
+    -0.10873401910066605,
+    0.05130569636821747,
+    -0.0011592544615268707,
+    -0.0032558459788560867,
+    0.3350878059864044,
+    0.14362351596355438,
+    -0.046291008591651917,
+    0.1339847445487976,
+    0.054939839988946915,
+    -0.20351824164390564,
+    0.13673129677772522,
+    -0.5390269160270691,
+    0.20881281793117523,
+    -0.12060403823852539,
+    0.08859001100063324,
+    -0.025972001254558563,
+    -0.4430112838745117,
+    -0.004151973873376846,
+    0.18464192748069763,
+    0.3526911437511444,
+    -0.09955041110515594,
+    0.10740470886230469,
+    0.35204750299453735,
+    0.013931604102253914,
+    0.5496764183044434,
+    0.004158753901720047,
+    0.2092914879322052,
+    0.09148923307657242,
+    -0.2677595019340515,
+    0.044244516640901566,
+    0.25607749819755554,
+    -0.3345734179019928,
+    -0.251026451587677,
+    -0.5243716239929199,
+    -0.07589935511350632,
+    0.2236466407775879,
+    0.692629337310791,
+    0.30974945425987244,
+    -0.05844153091311455,
+    0.03120841458439827,
+    0.06444196403026581,
+    -0.04019276052713394,
+    0.20844346284866333,
+    0.02490842342376709,
+    -0.4918057918548584,
+    0.17454466223716736,
+    0.008414536714553833,
+    0.3902796804904938,
+    -0.358223557472229,
+    0.03861651197075844,
+    0.18186673521995544,
+    -0.527253270149231,
+    -0.13051800429821014,
+    0.05706249922513962,
+    0.6442924737930298,
+    0.1802804172039032,
+    0.41333693265914917,
+    0.23335036635398865,
+    0.1602075695991516,
+    0.05822325870394707,
+    0.07076271623373032,
+    0.1175406351685524,
+    -0.11005706340074539
+  ],
+  "Peter_Male_EN_US": [
+    -0.02671806514263153,
+    -0.1403174251317978,
+    0.05001065880060196,
+    -0.1078786849975586,
+    -0.11178930848836899,
+    0.35923928022384644,
+    -0.10216598212718964,
+    -0.0744534507393837,
+    0.1513819694519043,
+    0.033666566014289856,
+    0.18034929037094116,
+    0.17279928922653198,
+    0.025053754448890686,
+    -0.041277479380369186,
+    -0.017297936603426933,
+    0.089497409760952,
+    0.09309914708137512,
+    -0.013885125517845154,
+    0.006094053387641907,
+    0.17924150824546814,
+    0.24564367532730103,
+    0.0279664546251297,
+    -0.0281650610268116,
+    -0.21447591483592987,
+    0.05961482226848602,
+    -0.24900272488594055,
+    0.08882076293230057,
+    -0.08165736496448517,
+    0.10724847763776779,
+    -0.12638989090919495,
+    -0.17386965453624725,
+    0.13405269384384155,
+    -0.34381765127182007,
+    -0.14751896262168884,
+    -0.22588366270065308,
+    -0.1478700041770935,
+    0.17554007470607758,
+    -0.06141895055770874,
+    -0.08225107192993164,
+    -0.11144131422042847,
+    -0.06510056555271149,
+    -0.28190499544143677,
+    0.038482390344142914,
+    -0.27125123143196106,
+    -0.18446698784828186,
+    -0.9286922812461853,
+    -0.22221821546554565,
+    0.1890208125114441,
+    -0.019690819084644318,
+    -0.0410115122795105,
+    0.020294375717639923,
+    0.027921512722969055,
+    -0.1279190480709076,
+    0.147661954164505,
+    -0.06851989030838013,
+    -0.3743244409561157,
+    0.16605032980442047,
+    -0.6898317337036133,
+    0.1322338581085205,
+    -0.24458415806293488,
+    0.8091621398925781,
+    -0.0020183511078357697,
+    -0.2860056161880493,
+    -0.101268470287323,
+    -0.12195206433534622,
+    -0.0030680038034915924,
+    0.35305753350257874,
+    0.18254509568214417,
+    0.15022733807563782,
+    0.05763908848166466,
+    0.05388329550623894,
+    -0.1870671808719635,
+    0.16868555545806885,
+    -0.005561813712120056,
+    -0.038571834564208984,
+    0.04676483944058418,
+    -0.30886393785476685,
+    0.167225182056427,
+    0.09490098804235458,
+    -0.1838131844997406,
+    -0.16072562336921692,
+    -0.07846863567829132,
+    -0.02979177236557007,
+    0.11231174319982529,
+    0.12191181629896164,
+    0.0011293292045593262,
+    0.00034183263778686523,
+    0.03643456846475601,
+    -0.004104208201169968,
+    0.0210350900888443,
+    -0.5258797407150269,
+    -0.28434744477272034,
+    -0.3846287727355957,
+    0.028454381972551346,
+    0.268708735704422,
+    -0.1812271922826767,
+    0.14686831831932068,
+    -0.19878965616226196,
+    0.014050750061869621,
+    -0.08926080912351608,
+    0.005555577576160431,
+    0.031308822333812714,
+    0.06296883523464203,
+    -0.030255869030952454,
+    0.014618240296840668,
+    0.15111848711967468,
+    0.08761651813983917,
+    0.017063427716493607,
+    -0.10042140632867813,
+    -0.5389280915260315,
+    0.01812286674976349,
+    -0.05892270803451538,
+    0.6147855520248413,
+    -0.2805640995502472,
+    0.04736167937517166,
+    0.08966498076915741,
+    -0.21231184899806976,
+    0.029807910323143005,
+    0.09918596595525742,
+    -0.030193090438842773,
+    -0.18508130311965942,
+    0.03303778916597366,
+    -0.04084295034408569,
+    -0.09405478835105896,
+    0.08350837975740433,
+    -0.2915036678314209,
+    -0.029322421178221703,
+    -0.2637880742549896,
+    -0.30284351110458374,
+    0.25083762407302856,
+    -0.05462878942489624,
+    0.437039852142334,
+    0.19965985417366028,
+    0.10523413866758347,
+    -0.10033008456230164,
+    0.08067487925291061,
+    0.4820234775543213,
+    0.572240948677063,
+    0.013404153287410736,
+    -0.18431192636489868,
+    0.049913421273231506,
+    0.14440348744392395,
+    0.010796692222356796,
+    -0.04425942152738571,
+    -0.034714244306087494,
+    0.09812092781066895,
+    0.22658464312553406,
+    0.5398628115653992,
+    0.14926102757453918,
+    -0.09294281154870987,
+    -0.29379820823669434,
+    0.2622401714324951,
+    -0.09194155037403107,
+    -0.007996812462806702,
+    0.16660672426223755,
+    -0.4457249045372009,
+    -0.1516806185245514,
+    -0.270463764667511,
+    -0.39716261625289917,
+    -0.1890673041343689,
+    -0.21207217872142792,
+    -0.24664843082427979,
+    -0.284817099571228,
+    -0.16662172973155975,
+    -0.1995084583759308,
+    0.07401363551616669,
+    0.006268583238124847,
+    -0.1385871022939682,
+    -0.24141649901866913,
+    -0.1358436644077301,
+    -0.268524706363678,
+    -0.13288292288780212,
+    -0.09328028559684753,
+    -0.037290073931217194,
+    -0.05488095059990883,
+    -0.03209332749247551,
+    -0.7609561085700989,
+    -0.2482100874185562,
+    0.23829908668994904,
+    -0.03605654835700989,
+    0.26974982023239136,
+    0.4465060234069824,
+    -0.1619127094745636,
+    -0.08157207816839218,
+    0.6551785469055176,
+    -0.6885366439819336,
+    0.26440972089767456,
+    -0.21962705254554749,
+    0.009923815727233887,
+    -0.02745674178004265,
+    -0.25602495670318604,
+    0.16939014196395874,
+    0.22960194945335388,
+    -0.113400399684906,
+    0.1810494065284729,
+    0.09377805143594742,
+    0.24526025354862213,
+    0.5957650542259216,
+    -0.1350148618221283,
+    -0.17307522892951965,
+    0.4434152841567993,
+    -0.06832988560199738,
+    -0.07215136289596558,
+    -0.18067269027233124,
+    -0.2654397487640381,
+    0.22328779101371765,
+    -0.1092233955860138,
+    0.001668304204940796,
+    0.05992841348052025,
+    -0.2866281270980835,
+    -0.0010518133640289307,
+    0.3629896640777588,
+    0.026638980954885483,
+    -0.3393930196762085,
+    0.162781223654747,
+    -0.21661463379859924,
+    -0.12741011381149292,
+    0.21406680345535278,
+    -0.16890040040016174,
+    0.014303475618362427,
+    0.1664152592420578,
+    -0.1924239546060562,
+    0.42883533239364624,
+    0.04888498783111572,
+    -0.5888325572013855,
+    -0.21445952355861664,
+    -0.2693358361721039,
+    -0.2160845249891281,
+    -0.18929903209209442,
+    0.29195863008499146,
+    0.08952829986810684,
+    -0.1636795699596405,
+    0.24474045634269714,
+    -0.28684142231941223,
+    -0.17337173223495483,
+    0.004411667585372925,
+    0.01754007488489151,
+    -0.5038971304893494,
+    0.2621816396713257,
+    -0.01969096064567566,
+    0.31702128052711487,
+    -0.40279054641723633,
+    -0.11601737886667252,
+    -0.014087185263633728,
+    -0.7800108194351196,
+    -0.11777613312005997,
+    -0.08311420679092407,
+    0.38773632049560547,
+    0.14981284737586975,
+    0.2543047368526459,
+    0.44576597213745117,
+    0.14574092626571655,
+    0.17641997337341309,
+    0.1667698472738266,
+    0.04656987264752388,
+    -0.36774906516075134
+  ],
+  "Jack_Male_EN_US": [
+    -0.04046084274887107,
+    -0.15095742102712392,
+    -0.006839682068675756,
+    -0.05994556490331888,
+    0.006684107612818471,
+    0.21780437231063843,
+    -0.05186830650782213,
+    0.045858974114526066,
+    0.18578437742544338,
+    0.23851692618336529,
+    0.25875567211769523,
+    0.01337982285767794,
+    0.0575837992830202,
+    0.010126538737677035,
+    -0.11069863769225777,
+    0.10501059270463883,
+    -0.038009885698556914,
+    0.09259203597903251,
+    0.006737061683088536,
+    0.033672554418444633,
+    0.12799083036952652,
+    -0.021536123100668186,
+    0.03342777863144876,
+    -0.17997418325394393,
+    0.017667141649872063,
+    -0.0656133412849158,
+    0.07788868229836225,
+    -0.20092849116772413,
+    -0.013813202735036612,
+    -0.15624882429838183,
+    -0.02039524572901428,
+    0.06695615525532048,
+    -0.03102828292176127,
+    -0.003229711949825284,
+    -0.16995424311608076,
+    0.021083407942205673,
+    0.0890236813283991,
+    -0.00943179002497345,
+    -0.016149783227592696,
+    0.09971937909722328,
+    -0.09221453487407416,
+    -0.15833347449079158,
+    -0.16861484068795107,
+    -0.19585764929652213,
+    0.019861079845577473,
+    -0.5355675680562854,
+    -0.03166075125336647,
+    0.27058335933834315,
+    -0.015265139937400807,
+    -0.014771698974072935,
+    0.0019185470417142012,
+    0.05587198091670871,
+    -0.09576068501919509,
+    -0.021184422494843605,
+    -0.17917617466300725,
+    -0.17127673390787096,
+    0.20629609785974026,
+    -0.18852811860851945,
+    0.012637676135636867,
+    -0.05553631568327547,
+    0.446656902320683,
+    -0.08865800648927688,
+    -0.0358334188349545,
+    -0.010281644179485738,
+    -0.07811169922351838,
+    0.109587676031515,
+    0.2809453630819917,
+    0.11856715977191926,
+    0.05583547845017166,
+    -0.12626958163455126,
+    0.1281449523754418,
+    -0.06413849201053382,
+    0.15792119931429624,
+    0.03743950969073922,
+    0.039248654276161685,
+    0.025773864006623626,
+    -0.11292729973793031,
+    0.20113790165632964,
+    -0.16639176942408085,
+    -0.08434787057340147,
+    -0.09793199738487603,
+    0.006178376311436293,
+    -0.013375372998416409,
+    0.0542846780270338,
+    0.10225461367517709,
+    0.039542005566181614,
+    0.039125220011919745,
+    -0.026181252760579806,
+    0.033635431178845474,
+    0.0862573640421033,
+    -0.16706872563809155,
+    -0.17828714922070504,
+    -0.12449762681499123,
+    -0.03108778726309538,
+    0.34800285045057533,
+    -0.09101906679570675,
+    0.10009890785440802,
+    -0.09514827439561487,
+    -0.034053249843418586,
+    -0.1105702156201005,
+    0.044860146183054894,
+    0.1358713038265705,
+    0.08085576379671693,
+    -0.08721686480566859,
+    0.0573709562420845,
+    0.11415926301851868,
+    0.245212434977293,
+    0.10061340439133346,
+    -0.14243060679873454,
+    -0.29007883500307796,
+    0.2037310192361474,
+    -0.07627206621691585,
+    0.39687543553300203,
+    -0.05620414759032429,
+    0.09882601262070238,
+    -0.0677183491177857,
+    -0.006265022698789827,
+    0.023330946313217284,
+    0.165302169136703,
+    0.02257582815364003,
+    -0.12311515075853095,
+    -0.1469769559800625,
+    0.012314948812127115,
+    -0.021668313816189756,
+    0.34593041818588977,
+    -0.04288801420480014,
+    -0.16514885276556016,
+    -0.18242249870672822,
+    -0.0026492349803447918,
+    -0.07602755706757307,
+    0.3050278574228287,
+    0.05592154124751687,
+    0.23462600968778133,
+    0.04983584135770798,
+    -0.278392353374511,
+    0.22432250184938313,
+    0.2686607390176505,
+    0.09875037595629692,
+    0.1183511849027127,
+    -0.054236005432903786,
+    0.08722816870140376,
+    0.03687728531658649,
+    0.02554770354181528,
+    0.12954192645847795,
+    -0.17979382164776325,
+    0.3348038989119232,
+    0.4423634188249707,
+    0.4926473870873451,
+    0.20223852992057798,
+    0.025749067217111565,
+    0.17501560486853124,
+    0.38072580406442286,
+    -0.2901147920638323,
+    0.20979762431234122,
+    0.2653069855645299,
+    -0.3772167260758579,
+    -0.09222150184214115,
+    -0.39089010236784816,
+    0.20441576987504956,
+    -0.23905933797359466,
+    -0.10717785591259599,
+    -0.16813391000032427,
+    -0.3918773714452982,
+    0.22002617083489895,
+    -0.07564694900065663,
+    0.07937551865907153,
+    0.08824989823624493,
+    0.29479082534089684,
+    -0.36032405607402324,
+    -0.2603407606482506,
+    -0.6042231546249242,
+    -0.28955514542758465,
+    -0.03436102028936147,
+    -0.04779914440587163,
+    -0.009762127837166193,
+    -0.10187441464513541,
+    -0.7215779416263104,
+    -0.27634545378386977,
+    0.17830019462853672,
+    -0.09875116832554341,
+    0.09090687595307827,
+    0.13431233698502182,
+    -0.2642242418602109,
+    -0.04540154328569772,
+    0.4095670249313116,
+    -0.35262783467769626,
+    0.16893002432771026,
+    0.08783781807869673,
+    0.25395081788301466,
+    -0.28145490009337665,
+    -0.326755971997045,
+    0.15969305289909244,
+    0.028953553736209857,
+    -0.21651662606745958,
+    0.4146030018106103,
+    -0.03423323482275009,
+    -0.16287488527595997,
+    0.39450788777321577,
+    0.12951190834864976,
+    0.1056388876902929,
+    0.08080296535044909,
+    0.3574946537613869,
+    0.019849372282624234,
+    0.040822872455464676,
+    -0.2921319276094437,
+    0.2146362524013966,
+    -0.134716684371233,
+    0.07487714374437929,
+    0.009296230599284167,
+    -0.04380686506628992,
+    -0.19373088590800763,
+    0.2639585494995117,
+    0.18104018196463584,
+    -0.16598513473290952,
+    0.04853666033595801,
+    -0.14947416950017212,
+    -0.21553540676832197,
+    0.08810192588716745,
+    -0.2630701248068362,
+    0.20122109837830066,
+    0.12282457035034895,
+    -0.37653126297518613,
+    0.23556544631719586,
+    0.10754718948155645,
+    -0.19623969851527362,
+    -0.37442944198846817,
+    -0.24549162713810802,
+    -0.12071249298751355,
+    0.07532338486053047,
+    0.4720609566196799,
+    0.33625265657901765,
+    0.09440774954855442,
+    0.08528476338833571,
+    -0.1694023549556732,
+    -0.07463834583759302,
+    0.11786841377615931,
+    -0.09039792915573344,
+    -0.5210130661725998,
+    0.24727064482867717,
+    -0.16189097724854945,
+    0.4837450442370027,
+    -0.1521998167037964,
+    -0.06750598764047028,
+    -0.004553849250078207,
+    -0.22957104928791522,
+    -0.017605035123415283,
+    0.2760094117373228,
+    0.05957211840432136,
+    -0.0009269976260838989,
+    0.1953226298559457,
+    0.223721924982965,
+    0.051529260072857144,
+    0.07620697033125907,
+    0.3487104419618845,
+    -0.29083244649809786,
+    -0.2311014744453132
+  ],
+  "Henry_Male_EN_US": [
+    0.03231465221033432,
+    -0.17197506912052632,
+    -0.15499479230493307,
+    -0.1250289712101221,
+    -0.16165112853050234,
+    0.11880796402692793,
+    -0.026341438165400174,
+    0.02843754307832569,
+    0.15634612458525227,
+    0.10595979747595265,
+    0.039114803401753315,
+    0.010465619154274462,
+    0.05449719361495225,
+    0.16031873143510894,
+    -0.1695658477488905,
+    -0.0018680405337363482,
+    -0.08171003330498933,
+    0.023001285642385474,
+    -0.1536627289839089,
+    0.011052230559289444,
+    0.11655736799002625,
+    -0.15112714925780893,
+    -0.010360681824386115,
+    -0.11909673623740674,
+    -0.21134809795767068,
+    0.20729044654872264,
+    0.19189890939742327,
+    0.034412209317088105,
+    0.07652324698865413,
+    -0.09826281983405351,
+    -0.03507392066530883,
+    0.05623610065958929,
+    0.023545358702540417,
+    0.1944381920620799,
+    -0.18579835649579762,
+    0.13530588764697313,
+    0.047002217611589,
+    -0.03857994754798711,
+    0.21279680896550413,
+    -0.0011355822905898053,
+    -0.11157526604365556,
+    -0.2419595753774047,
+    -0.25546876950538716,
+    0.03623581342399121,
+    -0.2026651309803128,
+    -0.5091725187376142,
+    0.11082670092582703,
+    0.1577077390626073,
+    -0.3597980797290802,
+    -0.011146663455292584,
+    0.08107478127349169,
+    0.22176175282802432,
+    -0.1103294763015583,
+    -0.15145504621323197,
+    -0.2445066723972559,
+    -0.1946099933935329,
+    0.256573729775846,
+    0.079415076575242,
+    0.07555773077765479,
+    -0.1760544968303293,
+    0.45968954982236027,
+    -0.2993650084361434,
+    0.23150971811264753,
+    -0.007668233546428378,
+    0.014109187014400976,
+    0.10277328002266586,
+    0.06715730596333742,
+    -0.1751516081392765,
+    0.13003269975306464,
+    -0.01215957077220084,
+    -0.10042227925732733,
+    0.13036054857075213,
+    0.008775722794234747,
+    0.07856735654640942,
+    -0.0694429306051461,
+    -0.04239498968236148,
+    -0.04804698210209607,
+    0.25152273084968324,
+    -0.1492430506274104,
+    0.16119943596422673,
+    0.33940611872822046,
+    0.08957686950452626,
+    -0.0576036872342229,
+    -0.08172749504446983,
+    -0.1345309724099934,
+    0.2364609685027972,
+    0.10072718104347586,
+    0.003340821829624467,
+    -0.04597767407540229,
+    0.018918244726955885,
+    -0.11239160280674693,
+    -0.07376309838145972,
+    0.12267176546156411,
+    -0.07140531631885097,
+    0.06295341402292251,
+    0.14427131954580547,
+    0.06250183843076229,
+    -0.1606520629953593,
+    -0.027804251573979866,
+    -0.14481351664289832,
+    0.11379512277198955,
+    0.4806660775095224,
+    -0.08994001736864446,
+    0.00915752639994026,
+    -0.11584404325112699,
+    0.14220867762342096,
+    0.2804017297923565,
+    0.18867827099747958,
+    -0.3128292956738733,
+    -0.06684039272367953,
+    0.21606469061225653,
+    -0.1878887706901878,
+    0.32509690122678875,
+    0.1665364772081375,
+    0.05110035295365378,
+    -0.24309139875695107,
+    -0.14905344853177668,
+    0.014429311221465464,
+    -0.04873416791670025,
+    0.14548272781539706,
+    -0.08988374508335253,
+    -0.21444802945479752,
+    0.058564639464020726,
+    0.2721280400641262,
+    0.205003977753222,
+    -0.010262779332697397,
+    -0.0029813522472977617,
+    -0.15529807284474373,
+    0.26777649037539963,
+    -0.0713763989508152,
+    0.3913366436958313,
+    0.24202184118330478,
+    0.07944705467671154,
+    0.4742859028279781,
+    -0.024598410213366118,
+    -0.18506417192984376,
+    0.4029238263145089,
+    0.08335387408733368,
+    0.10739199253730473,
+    -0.13465733248740436,
+    -0.01804239540069829,
+    -0.0791158242151141,
+    0.19488584250211716,
+    0.02578564528375863,
+    -0.5237501479685307,
+    -0.07989736758172511,
+    0.5030703557655215,
+    0.10675456821918486,
+    0.2127919055521488,
+    0.09851150363683697,
+    -0.08418610896915199,
+    0.1229889305308461,
+    -0.43523957654833795,
+    0.4120137566700578,
+    0.4287545826286077,
+    0.2022662471514195,
+    -0.20844841264188288,
+    -0.35773375257849693,
+    0.08517274558544158,
+    -0.1674375392496586,
+    -0.014676835667341941,
+    -0.05585243180394173,
+    -0.5275113929063082,
+    0.630928717367351,
+    -0.01564715448766945,
+    -0.23996227737661685,
+    0.45196260644588615,
+    0.5890609898604452,
+    -0.6399751238524913,
+    -0.15936621921136973,
+    -0.7311209061648697,
+    0.2599357020109892,
+    -0.31862640250474217,
+    -0.04317740127444269,
+    -0.11731456399429589,
+    -0.25942440861836075,
+    -0.2941827371716499,
+    0.23730804622173307,
+    0.46029331209138036,
+    0.38680253662168984,
+    -0.047635487094521534,
+    0.09478947315365074,
+    -0.3081191055476665,
+    -0.1648157351184636,
+    0.6156397035345436,
+    -0.13436328768730166,
+    0.2214298250619322,
+    -0.13365367855876686,
+    0.5899101013317705,
+    -0.43101135436445476,
+    0.1790693347575143,
+    -0.07457639621570705,
+    -0.20997890923172235,
+    -0.12710947534069417,
+    0.6243484194390476,
+    -0.7802158012986182,
+    -0.2108477063477039,
+    0.26811757814139126,
+    0.06200872911140322,
+    0.41312811655006954,
+    -0.03243043515831233,
+    0.2404710978269577,
+    -0.2426718469709158,
+    0.18551481867325492,
+    -0.1008815299719572,
+    0.30738673652522264,
+    0.04650051929056642,
+    0.04460244094952941,
+    0.23529892023652793,
+    -0.10654573403298855,
+    -0.24890044219791893,
+    -0.20255712829530237,
+    -0.02652014940977096,
+    -0.09871285315603015,
+    0.2915390910580754,
+    0.27769559375010433,
+    -0.14592748656868934,
+    0.29195716343820094,
+    -0.07187115289270878,
+    0.1771868597716093,
+    0.041152336634695516,
+    -0.1439833148382604,
+    0.30823934525251384,
+    0.3235661891289055,
+    0.1609754763310775,
+    -0.34351673722267145,
+    -0.022873798222281028,
+    0.09521509874612094,
+    0.32318700947798795,
+    0.8087762531824411,
+    0.3908264026977122,
+    0.002011305466294286,
+    0.043111137486994265,
+    0.07222179947420956,
+    -0.16160998903214935,
+    0.9778514429926872,
+    -0.1190133649390191,
+    -0.06515133678913118,
+    0.07287682355381547,
+    -0.5366707997396588,
+    0.3294163831975311,
+    0.4730239138007164,
+    0.14623114340938625,
+    -0.04914769362658263,
+    0.19938274882733825,
+    -0.042190209974069144,
+    0.5218729123473167,
+    -0.09630445644725116,
+    -0.05060101728595327,
+    0.003836261900141802,
+    0.23963055023923516,
+    0.34797419500537213,
+    -0.24325519371777776,
+    0.2133896093349904,
+    -0.5972239149094094,
+    -0.5162009912542999
+  ],
+  "Lisa_Female_EN_US": [
+    0.098259643453639,
+    -0.16734164860099554,
+    0.06323453821241856,
+    -0.11800012588500977,
+    -0.11747334823012351,
+    0.1664506748318672,
+    -0.11684786230325699,
+    0.05554657885804772,
+    0.18881248405668885,
+    0.02363725304603577,
+    0.13073167139664293,
+    0.099481688067317,
+    -0.05592308223713189,
+    0.06073833145201206,
+    -0.06404643282294273,
+    0.03512822799384594,
+    0.08353134356439113,
+    -0.041701164841651914,
+    0.027301983349025248,
+    0.20779836773872373,
+    0.18608229857636616,
+    -0.18563928958028555,
+    -0.07058929353952408,
+    -0.0963012244552374,
+    0.07196986377239227,
+    -0.36310549527406694,
+    0.02321777753531933,
+    -0.08520180359482765,
+    0.1750676281750202,
+    -0.1034254938364029,
+    -0.10941653922200203,
+    0.10243654051446356,
+    -0.047365300357341766,
+    -0.13165730237960815,
+    -0.16502732019871474,
+    0.041227119415998464,
+    0.13208873476833105,
+    -0.1005905382335186,
+    -0.134281662479043,
+    -0.03663246519863605,
+    0.019146875315345823,
+    -0.17250166907906533,
+    -0.10810867324471474,
+    -0.1385631315410137,
+    -0.07121777702122926,
+    -0.8006405025720597,
+    -0.1598912551999092,
+    0.1796777807176113,
+    0.1533122271299362,
+    -0.013358959695324302,
+    0.12085846066474915,
+    0.08968418501317502,
+    -0.1647926703095436,
+    -0.03534330911934376,
+    0.018276208639144892,
+    -0.31622386574745176,
+    0.044433788210153584,
+    -0.3835114762187004,
+    0.06946051493287086,
+    -0.14754583239555358,
+    0.7700884103775024,
+    0.05088452622294426,
+    -0.10631981901824475,
+    -0.03452881909906864,
+    -0.18428492173552513,
+    0.01940714865922928,
+    0.15734580717980862,
+    0.24781300947070123,
+    0.11008071005344391,
+    -0.037437029182910926,
+    -0.02237723711878061,
+    -0.05610082112252712,
+    0.17097073495388032,
+    -0.029630468040704724,
+    -0.14090186282992362,
+    0.05233948081731796,
+    -0.11303650513291358,
+    0.09975283332169056,
+    0.07712901234626768,
+    -0.10158926621079445,
+    -0.006173290871083734,
+    -0.05509051866829395,
+    -0.02421657182276249,
+    0.12725706659257413,
+    0.0459196088835597,
+    0.024673170596361163,
+    -0.021687139011919498,
+    -0.0033121073618531255,
+    0.16617082729935645,
+    0.1039574097841978,
+    -0.48374021649360655,
+    -0.27473467141389846,
+    -0.2696342796087265,
+    -0.0147636947222054,
+    0.07800779491662979,
+    -0.17835728526115419,
+    0.07246882431209087,
+    -0.10726579539477825,
+    -0.04813114702701569,
+    -0.05245459228754044,
+    0.021175800473429263,
+    -0.028440106660127633,
+    0.09296442177146673,
+    0.06005613040179014,
+    -0.024471254087984562,
+    0.034369273111224174,
+    0.21559504568576812,
+    -0.06217287853360176,
+    0.020450182259082794,
+    -0.42201632708311076,
+    0.18088365010917187,
+    0.06422147378325462,
+    0.42920178174972534,
+    -0.15756667256355283,
+    0.07944225370883942,
+    0.07854075198993087,
+    -0.08394828196614981,
+    -0.02590339761227369,
+    0.0945357296615839,
+    0.013894812762737276,
+    -0.0668190572410822,
+    0.03327381014823913,
+    0.017544799577444793,
+    0.03966145385056734,
+    0.185294571146369,
+    -0.30561336129903793,
+    0.05225303545594215,
+    -0.20599330589175224,
+    -0.0247444950044155,
+    0.0681127518415451,
+    0.04429299384355545,
+    0.1615323081612587,
+    -0.11333180218935013,
+    0.05444136634469032,
+    -0.32481844425201417,
+    0.18828704990446568,
+    0.2872520424425602,
+    0.13895429372787474,
+    -0.08756729066371917,
+    0.24525217991322276,
+    0.03871614711242728,
+    0.11254438832402229,
+    -0.06629508603364229,
+    -0.10699533671140671,
+    -0.049380650371313096,
+    0.06740754097700119,
+    0.16857104301452636,
+    0.30641851425170896,
+    -0.0305225610733032,
+    -0.4179070383310318,
+    -0.10720842555165291,
+    0.2560294335708022,
+    -0.019710254669189464,
+    -0.17763112932443617,
+    -0.0029114261269569397,
+    -0.31358570866286756,
+    0.041973935812711714,
+    -0.29000549390912056,
+    -0.35404677540063856,
+    -0.007181685417890549,
+    0.05955917574465275,
+    -0.1936878278851509,
+    -0.4633562132716179,
+    0.021091651916503917,
+    -0.19877577703446148,
+    -0.035990026939543895,
+    -0.13621442914009094,
+    0.14864262491464614,
+    -0.32244770638644693,
+    -0.20621291697025299,
+    -0.34765296103432775,
+    -0.24935359358787537,
+    -0.14733021520078182,
+    -0.1826939433813095,
+    -0.12082219868898392,
+    -0.2630250319838524,
+    -0.25408093333244325,
+    -0.23697019964456556,
+    0.3174678087234497,
+    -0.13980808593332766,
+    0.15056745186448098,
+    0.19160462617874144,
+    0.05726437717676161,
+    -0.010723254084587096,
+    0.3363094590604305,
+    -0.5907457679510116,
+    0.2439893047325313,
+    -0.08692961037158967,
+    0.06740072220563889,
+    0.024416530132293696,
+    -0.2389059288892895,
+    0.04350413922220468,
+    0.1647874414920807,
+    -0.22662141621112825,
+    -0.22065756656229496,
+    0.2145439937710762,
+    -0.17922353893518447,
+    0.12061219662427902,
+    0.03737899707630277,
+    -0.021762318909168236,
+    0.05909155458211899,
+    0.11406668499112128,
+    -0.33404846489429474,
+    -0.006388737261295324,
+    -0.30795534551143644,
+    0.12133514666929841,
+    0.004110211133956904,
+    0.13224451523274183,
+    0.3036839559674263,
+    -0.21054075136780737,
+    0.0030692771077156025,
+    0.0035036206245422363,
+    0.011927027255296707,
+    0.08805800816044211,
+    0.0661589827388525,
+    0.17130252979695795,
+    0.09395805150270461,
+    0.26391741409897806,
+    -0.05089263916015625,
+    -0.11061666905879974,
+    0.13363431245088578,
+    -0.30518253389745953,
+    -0.04395102113485336,
+    -0.03574146777391435,
+    -0.23613579357042908,
+    -0.16882284134626388,
+    -0.30275803729891776,
+    -0.04444010443985462,
+    0.2190699848346412,
+    0.27886907644569875,
+    0.20610505752265454,
+    0.03134636655449867,
+    -0.06447610408067703,
+    -0.0005596891045570457,
+    0.26649302095174787,
+    -0.2057398557662964,
+    0.0030920282006263733,
+    -0.4122629433870315,
+    0.3565848290920257,
+    0.20677504390478132,
+    0.3942677197046578,
+    -0.3252736687660217,
+    0.0967718569561839,
+    0.03678370863199233,
+    -0.3206644430756569,
+    -0.10442807674407958,
+    0.09797048419713975,
+    0.5563426822423935,
+    0.04038007035851478,
+    0.41352313160896303,
+    0.3068622753024101,
+    0.11289987117052078,
+    0.19880020171403884,
+    0.0596605807542801,
+    0.11166647523641585,
+    -0.09573411736637354
+  ],
+  "Anne_Female_EN_US": [
+    0.17940405994304454,
+    -0.1644238923676312,
+    0.09324024524539709,
+    -0.12510012816637756,
+    -0.06630658619105817,
+    0.11119609288871288,
+    -0.16596424281597139,
+    0.08907460342161358,
+    0.19401996767846869,
+    0.01849436295451596,
+    0.14532765592448413,
+    0.08469446934759615,
+    -0.023078771238215272,
+    0.07181636142777278,
+    -0.15694392705336213,
+    0.026453089481219653,
+    0.14835394602268934,
+    -0.050624337047338486,
+    0.08605103651061653,
+    0.25574559848755596,
+    0.18944694046513177,
+    -0.335114658344537,
+    -0.10531004574149846,
+    -0.13781959619373083,
+    0.06695765908807516,
+    -0.43373020405415447,
+    -0.00021649692207574567,
+    -0.06944741196930408,
+    0.18373148031532766,
+    -0.18261400833725927,
+    -0.07242659293115139,
+    0.07962212040729355,
+    -0.054722306877374643,
+    -0.08814518945291638,
+    -0.18172899140045046,
+    0.07253306582570077,
+    0.12834971025440609,
+    -0.07147813141345978,
+    -0.03303390946239235,
+    -0.020850191079080108,
+    0.03093239173758775,
+    -0.17062115278095008,
+    -0.2042961787432432,
+    0.0001160103827714809,
+    -0.05252801310271025,
+    -0.7858508661389351,
+    -0.2091302715241909,
+    0.215912102162838,
+    0.1682404987514019,
+    -0.004957896983250976,
+    0.08353579475078732,
+    0.07429065436590462,
+    -0.2838975650956854,
+    -0.060729915578849616,
+    0.015331120043992993,
+    -0.3538943402469158,
+    0.06603590287268161,
+    -0.3598479990148917,
+    0.04949475321918727,
+    -0.1802994295954704,
+    0.8948932841420174,
+    0.06269775629043578,
+    0.14551597367972133,
+    -0.024429614841938015,
+    -0.18672503978013993,
+    0.0038162305951118525,
+    0.06440221983939409,
+    0.22779810093343258,
+    0.0648292437195778,
+    -0.06520362980663777,
+    -0.1480596019886434,
+    -0.07716014515608549,
+    0.19149241223931313,
+    -0.00472725033760071,
+    -0.15999614455504343,
+    0.05745576778426767,
+    -0.1102717611938715,
+    0.10388381499797106,
+    0.060168941505253315,
+    -0.059192010387778285,
+    0.16045401743613183,
+    -0.08206549435853958,
+    -0.00931916926056147,
+    0.07603645194321872,
+    -0.015994349215179678,
+    0.06288723759353161,
+    0.035409542825073,
+    -0.02822249522432685,
+    0.1511568885296583,
+    0.025192760489881047,
+    -0.45415958352386954,
+    -0.2641012085601687,
+    -0.09631151705980302,
+    0.05240553788607939,
+    -0.038502784445881844,
+    -0.1828939441591501,
+    0.08149223010987043,
+    -0.16405740920454265,
+    -0.0005170568823814475,
+    -0.02928877165541053,
+    0.04517688824562356,
+    0.006311735883355152,
+    0.07976922886446118,
+    0.10996842151507735,
+    -0.08294843146577477,
+    -0.04048346038907766,
+    0.20001366436481477,
+    -0.17603044249117372,
+    0.030919674783945097,
+    -0.45755103826522825,
+    0.27328743394464255,
+    0.14530749581754207,
+    0.37551659494638445,
+    -0.08780852369964123,
+    0.0881434208364226,
+    0.1102366984821856,
+    -0.10324715869501233,
+    -0.0326073732227087,
+    0.11423155306838453,
+    0.08385749866720289,
+    -0.044914178038015964,
+    0.03559637144207953,
+    0.15771918892860412,
+    0.1590451302938163,
+    0.2501667616888881,
+    -0.4137455578893423,
+    0.0505435885861516,
+    -0.23191697373986245,
+    0.0287872213870287,
+    0.003149333596229556,
+    0.10368789061903953,
+    -0.010588538646697995,
+    -0.0879323348402977,
+    -0.07506629079580307,
+    -0.5269412443041801,
+    0.22431598380208015,
+    0.3359779428690672,
+    0.0032832570374011962,
+    -0.050378158874809745,
+    0.3839163174852729,
+    0.028349736475502138,
+    -0.10097187757492063,
+    -0.11896620839834213,
+    -0.1368182884529233,
+    0.04117071777582168,
+    0.09757037162780761,
+    0.30170601047575474,
+    0.42509459182620046,
+    -0.031749222427606555,
+    -0.450509675219655,
+    -0.10203840397298336,
+    0.11731623336672783,
+    -0.14807355552911758,
+    -0.3516409188508987,
+    -0.014435897022485725,
+    -0.2427325451746583,
+    0.06217130366712809,
+    -0.38942934162914755,
+    -0.23018259108066563,
+    -0.046737963333725915,
+    0.21478279903531072,
+    -0.15159097351133824,
+    -0.6199001990258693,
+    0.11925626099109649,
+    -0.11402976419776678,
+    0.007391047988494388,
+    -0.00794237181544305,
+    0.3376276850700378,
+    -0.469353917054832,
+    -0.39055002434179187,
+    -0.5357521926518529,
+    -0.2898576606065035,
+    -0.07800486553460359,
+    -0.3499526508152485,
+    -0.09789482404012233,
+    -0.29802559399977324,
+    -0.0005799770355224831,
+    -0.16338682733476162,
+    0.3528595224022865,
+    -0.43563686497509474,
+    0.09255755357444287,
+    0.14223229270428417,
+    0.0687978647649288,
+    9.937696158886233e-05,
+    0.370262223854661,
+    -0.6324451595544816,
+    0.2665458579082042,
+    -0.1399991037324071,
+    0.12931404411792755,
+    0.16657713875174524,
+    -0.12588859747629613,
+    0.11505986778065562,
+    0.04677194319665433,
+    -0.3566686304286122,
+    -0.24695453979074955,
+    0.2476239986717701,
+    -0.5334096863865851,
+    -0.07400115504860877,
+    0.12062631538137794,
+    0.04516074173152446,
+    -0.052435807511210436,
+    0.3395687915384769,
+    -0.35318856965750456,
+    0.09023026153445243,
+    -0.33012407943606376,
+    0.1426998670678586,
+    0.16262518018484115,
+    0.1251251042820513,
+    0.46874684616923334,
+    -0.15137760601937772,
+    0.068039158731699,
+    -0.30281599573791024,
+    -0.2254273697733879,
+    0.27405579378828404,
+    0.17136543877422808,
+    0.10630016443319619,
+    0.057858095318079,
+    0.11517375223338605,
+    -0.15762499999254942,
+    -0.29063242860138416,
+    0.20158795565366744,
+    -0.29120013369247316,
+    -0.24734700098633766,
+    -0.11135954931378364,
+    0.0172116317320615,
+    -0.31362133994698527,
+    -0.22704790353309365,
+    0.04125624597072601,
+    0.2366799856070429,
+    0.1992180491797626,
+    0.2684565844014287,
+    0.2055516693741083,
+    -0.33117692880332467,
+    -0.031874293368309735,
+    0.38305166363716125,
+    -0.29617225378751755,
+    -0.04308449327945709,
+    -0.5536482103168965,
+    0.5467873688321561,
+    0.17487224414944647,
+    0.4601214074995369,
+    -0.21070712432265282,
+    0.14032430904917417,
+    0.04221225241199136,
+    -0.1693667344748974,
+    -0.05280606932938099,
+    0.19504989907145498,
+    0.5787762992084027,
+    -0.0508410669863224,
+    0.3892222262918949,
+    0.3818872168660164,
+    0.1461108922958374,
+    0.2555951376445591,
+    -0.23311877846717832,
+    0.03256144672632219,
+    0.08528051348403096
+  ]
+}

voices_.json.example ADDED Viewed

	@@ -0,0 +1,2840 @@

+{
+  "Richard_Male_EN_US": [
+    0.0838528722524643,
+    -0.20531457662582397,
+    -0.10166072100400925,
+    -0.08704791963100433,
+    -0.16887575387954712,
+    0.07563386857509613,
+    -0.11252004653215408,
+    0.0405205562710762,
+    0.17604444921016693,
+    -0.0022975243628025055,
+    0.07261361181735992,
+    0.01948651298880577,
+    0.03549861162900925,
+    0.10135026276111603,
+    -0.028263121843338013,
+    0.002969518303871155,
+    0.0983193963766098,
+    -0.0498846136033535,
+    -0.059726495295763016,
+    0.14428836107254028,
+    0.22363322973251343,
+    -0.18453018367290497,
+    -0.02805022895336151,
+    -0.04543690383434296,
+    -0.11356891691684723,
+    -0.13455606997013092,
+    0.09829540550708771,
+    -0.08688592910766602,
+    0.07672901451587677,
+    -0.18105418980121613,
+    -0.18634817004203796,
+    0.12421728670597076,
+    -0.007412530481815338,
+    -0.05725667625665665,
+    -0.1923050880432129,
+    0.12631717324256897,
+    0.19541586935520172,
+    -0.07617154717445374,
+    -0.08994933217763901,
+    -0.046437621116638184,
+    -0.043646734207868576,
+    -0.14466425776481628,
+    -0.10671895742416382,
+    -0.013705611228942871,
+    -0.207138791680336,
+    -0.752566933631897,
+    -0.1562556028366089,
+    0.15995296835899353,
+    -0.17764419317245483,
+    0.016572676599025726,
+    0.10811036825180054,
+    0.08399468660354614,
+    -0.08008511364459991,
+    -0.03857298195362091,
+    -0.0649266391992569,
+    -0.22515754401683807,
+    0.11427924036979675,
+    -0.24663931131362915,
+    0.1547779142856598,
+    -0.14620377123355865,
+    0.5633630752563477,
+    -0.1662401258945465,
+    -0.09044578671455383,
+    0.004183262586593628,
+    -0.0022140033543109894,
+    -0.04644201323390007,
+    0.09777267277240753,
+    0.007478602230548859,
+    0.09729039669036865,
+    0.014121796935796738,
+    -0.1283886432647705,
+    0.016886277124285698,
+    0.14353325963020325,
+    -0.019632495939731598,
+    -0.08626653999090195,
+    -0.13174456357955933,
+    -0.07509270310401917,
+    0.13293522596359253,
+    -0.006963282823562622,
+    0.04489506036043167,
+    0.1364283263683319,
+    -0.023780206218361855,
+    -0.13672304153442383,
+    0.0834212377667427,
+    -0.0881689265370369,
+    0.12765783071517944,
+    -0.04777761548757553,
+    0.006771944463253021,
+    0.10008896142244339,
+    -0.0004957094788551331,
+    -0.3792557418346405,
+    -0.2995077669620514,
+    -0.18048475682735443,
+    -0.05329251289367676,
+    0.15887054800987244,
+    -0.05502178147435188,
+    -0.002659738063812256,
+    -0.1500413417816162,
+    0.02029288560152054,
+    -0.13041751086711884,
+    -0.02238699235022068,
+    0.13223209977149963,
+    0.008205652236938477,
+    0.09628777951002121,
+    -0.005202248692512512,
+    0.06322506815195084,
+    0.2893131375312805,
+    0.13000035285949707,
+    -0.12980809807777405,
+    -0.19357866048812866,
+    0.010631434619426727,
+    -0.09848842024803162,
+    0.43145453929901123,
+    -0.07535015791654587,
+    0.030972477048635483,
+    -0.005491979420185089,
+    -0.23411263525485992,
+    0.0034233778715133667,
+    -0.03615764528512955,
+    0.11543036997318268,
+    -0.04371267557144165,
+    -0.11525161564350128,
+    0.023816093802452087,
+    0.028308294713497162,
+    -0.0039406418800354,
+    -0.13210836052894592,
+    0.06155262142419815,
+    -0.17223545908927917,
+    -0.07812541723251343,
+    -0.04739491268992424,
+    0.012321650981903076,
+    0.2732156217098236,
+    -0.006376683712005615,
+    0.2102227807044983,
+    -0.026796162128448486,
+    0.023160047829151154,
+    0.4432758688926697,
+    0.05461269989609718,
+    -0.05827128142118454,
+    -0.23424984514713287,
+    0.004663914442062378,
+    0.22082097828388214,
+    0.28713545203208923,
+    -0.08923299610614777,
+    -0.11788474768400192,
+    -0.14981813728809357,
+    0.008348524570465088,
+    0.15658962726593018,
+    0.2602955102920532,
+    -0.32679831981658936,
+    -0.38099929690361023,
+    0.22179332375526428,
+    -0.18678224086761475,
+    0.1026342585682869,
+    0.03508329764008522,
+    -0.0821741446852684,
+    -0.08279386162757874,
+    -0.2976434528827667,
+    -0.4842967391014099,
+    -0.22528287768363953,
+    0.05699944496154785,
+    -0.16488179564476013,
+    -0.15537670254707336,
+    0.17949746549129486,
+    -0.0883849710226059,
+    -0.07729420065879822,
+    -0.011546742171049118,
+    0.055838439613580704,
+    -0.2754920423030853,
+    0.24266156554222107,
+    -0.25005173683166504,
+    0.15594978630542755,
+    -0.2281457483768463,
+    -0.022545501589775085,
+    -0.18430665135383606,
+    -0.09129363298416138,
+    -0.46234992146492004,
+    0.18292169272899628,
+    0.3553994596004486,
+    0.18489143252372742,
+    0.16369718313217163,
+    0.06524109095335007,
+    -0.13083520531654358,
+    -0.004223830997943878,
+    0.5248246192932129,
+    -0.4541511535644531,
+    0.33416515588760376,
+    -0.23953655362129211,
+    0.039670318365097046,
+    -0.2775384485721588,
+    -0.06698162853717804,
+    -0.04875987395644188,
+    0.095211923122406,
+    -0.2521704435348511,
+    -0.1613994985818863,
+    -0.11273781210184097,
+    0.089663565158844,
+    0.4533567726612091,
+    -0.12889298796653748,
+    0.04630730301141739,
+    0.3440477252006531,
+    -0.29735368490219116,
+    -0.16942940652370453,
+    -0.30063536763191223,
+    -0.04887039214372635,
+    0.07650876045227051,
+    -0.24708417057991028,
+    0.11167217791080475,
+    0.41300496459007263,
+    -0.1990889608860016,
+    -0.03628714382648468,
+    0.1947430670261383,
+    0.002185918390750885,
+    -0.1734754890203476,
+    0.007325600832700729,
+    0.0758146345615387,
+    0.14343833923339844,
+    0.16034956276416779,
+    0.09238007664680481,
+    -0.14523102343082428,
+    0.07370740175247192,
+    -0.05756370723247528,
+    0.3471793532371521,
+    -0.09545806050300598,
+    -0.32518574595451355,
+    0.01974405348300934,
+    -0.07413671910762787,
+    -0.07332949340343475,
+    0.09689724445343018,
+    0.33527132868766785,
+    0.07647977769374847,
+    -0.07537016272544861,
+    0.19729986786842346,
+    -0.054191842675209045,
+    0.019159607589244843,
+    0.28645795583724976,
+    -0.0029614195227622986,
+    0.050225719809532166,
+    0.058712102472782135,
+    -0.020884737372398376,
+    0.32063227891921997,
+    0.13905727863311768,
+    -0.08830951899290085,
+    -0.2068493664264679,
+    -0.5085070133209229,
+    -0.19245725870132446,
+    0.07536047697067261,
+    0.3520141541957855,
+    -0.004317941144108772,
+    0.11286243796348572,
+    0.4874182343482971,
+    0.197415292263031,
+    -0.11710592359304428,
+    0.41149505972862244,
+    -0.024338984861969948,
+    -0.3128387928009033
+  ],
+  "Chuck_Male_EN_US": [
+    -0.080739326775074,
+    -0.05186597257852554,
+    -0.04643955081701279,
+    -0.16995930671691895,
+    -0.08520634472370148,
+    0.26354464888572693,
+    -0.16335150599479675,
+    0.03762347623705864,
+    0.12310560047626495,
+    0.11882100999355316,
+    0.25169509649276733,
+    0.16642478108406067,
+    0.04346868768334389,
+    0.05879981815814972,
+    -0.10555227845907211,
+    0.098129041492939,
+    0.07805588096380234,
+    0.07993364334106445,
+    0.02878241240978241,
+    0.10626713931560516,
+    0.18884220719337463,
+    -0.15791675448417664,
+    -0.046336084604263306,
+    -0.06054564565420151,
+    0.14667274057865143,
+    -0.1686663031578064,
+    0.13676004111766815,
+    0.017875753343105316,
+    0.034988172352313995,
+    -0.15061573684215546,
+    -0.06622567772865295,
+    -0.018525442108511925,
+    -0.0815289169549942,
+    -0.11815841495990753,
+    -0.2079353630542755,
+    -0.12242597341537476,
+    0.1685279756784439,
+    0.005029462277889252,
+    -0.04868243634700775,
+    -0.006423652172088623,
+    -0.03062119334936142,
+    -0.10892745107412338,
+    -0.029393166303634644,
+    -0.14687927067279816,
+    -0.10593554377555847,
+    -0.7461926937103271,
+    -0.1311473250389099,
+    0.19617816805839539,
+    0.0034797536209225655,
+    0.017870396375656128,
+    0.1417236328125,
+    0.14160263538360596,
+    -0.13305433094501495,
+    -0.019239917397499084,
+    0.0768553614616394,
+    -0.2461501657962799,
+    0.11662117391824722,
+    -0.3004859685897827,
+    0.07660801708698273,
+    -0.08291581273078918,
+    0.646587610244751,
+    0.05630920082330704,
+    0.018290594220161438,
+    -0.076407790184021,
+    -0.022635802626609802,
+    -0.033361952751874924,
+    0.13813386857509613,
+    0.23338472843170166,
+    0.1526336669921875,
+    -0.010544595308601856,
+    0.0645538792014122,
+    -0.12352045625448227,
+    0.23500092327594757,
+    0.023993253707885742,
+    -0.18476025760173798,
+    0.08681316673755646,
+    -0.24495398998260498,
+    0.1805841475725174,
+    0.019407637417316437,
+    -0.1164683923125267,
+    0.04219061881303787,
+    -0.05617094039916992,
+    -0.06486696004867554,
+    0.11154982447624207,
+    0.1624276340007782,
+    0.037209782749414444,
+    0.024621259421110153,
+    -0.011263281106948853,
+    0.07437050342559814,
+    -0.027852090075612068,
+    -0.38197386264801025,
+    -0.12534263730049133,
+    -0.15090727806091309,
+    -0.016294121742248535,
+    0.20195062458515167,
+    -0.10578630119562149,
+    0.0834207683801651,
+    -0.2002831995487213,
+    0.08801543712615967,
+    -0.1744777411222458,
+    -0.019703160971403122,
+    0.18256394565105438,
+    0.019218653440475464,
+    -0.06123323366045952,
+    0.03127707168459892,
+    0.1301468461751938,
+    0.2009834200143814,
+    0.07228143513202667,
+    -0.05553338676691055,
+    -0.44165605306625366,
+    0.27527377009391785,
+    -0.08356539905071259,
+    0.5484572649002075,
+    -0.22545355558395386,
+    0.10192038118839264,
+    0.13001194596290588,
+    -0.1598789542913437,
+    0.03745634853839874,
+    0.1973172128200531,
+    0.10266508162021637,
+    -0.10606272518634796,
+    -0.043102242052555084,
+    0.023647010326385498,
+    0.0016689617186784744,
+    0.35201117396354675,
+    -0.24116483330726624,
+    0.022700302302837372,
+    -0.29655206203460693,
+    -0.11883702874183655,
+    0.06606853753328323,
+    -0.09965553879737854,
+    0.3298850655555725,
+    -0.10845916718244553,
+    0.12123875319957733,
+    -0.2904745042324066,
+    0.16466909646987915,
+    0.5250499844551086,
+    0.26844123005867004,
+    -0.1581430584192276,
+    -0.01679089665412903,
+    0.10683909058570862,
+    0.13829728960990906,
+    -0.0774451196193695,
+    -0.09715490788221359,
+    -0.15488898754119873,
+    0.09843119978904724,
+    0.18665491044521332,
+    0.499192476272583,
+    0.25495558977127075,
+    -0.3385838270187378,
+    -0.11540469527244568,
+    0.3943556845188141,
+    -0.028287045657634735,
+    0.10370328277349472,
+    0.3033093214035034,
+    -0.13608571887016296,
+    -0.07120000571012497,
+    -0.14757874608039856,
+    -0.2092522382736206,
+    -0.349817156791687,
+    -0.042082756757736206,
+    -0.28092268109321594,
+    -0.3474852740764618,
+    -0.1188138797879219,
+    -0.3396819233894348,
+    0.16733011603355408,
+    0.09606117755174637,
+    0.10766048729419708,
+    -0.19903156161308289,
+    -0.30723923444747925,
+    -0.3617871105670929,
+    -0.24389447271823883,
+    -0.33159559965133667,
+    -0.0794874057173729,
+    -0.17680421471595764,
+    -0.09732476621866226,
+    -0.6020764708518982,
+    -0.23046669363975525,
+    0.4663076400756836,
+    -0.03177022933959961,
+    0.31536608934402466,
+    0.17449232935905457,
+    0.19207462668418884,
+    -0.20476028323173523,
+    0.3841668367385864,
+    -0.46485692262649536,
+    0.07715408504009247,
+    -0.0481671467423439,
+    0.06056290864944458,
+    0.23023973405361176,
+    -0.44577276706695557,
+    0.08608794212341309,
+    0.174083411693573,
+    -0.20057682693004608,
+    0.0015126615762710571,
+    0.30561572313308716,
+    -0.0823325663805008,
+    0.6210863590240479,
+    0.12966740131378174,
+    -0.27340665459632874,
+    0.31147849559783936,
+    0.17376896739006042,
+    -0.28636378049850464,
+    0.1761811077594757,
+    -0.3044331669807434,
+    0.0771920382976532,
+    -0.04131172597408295,
+    0.17433065176010132,
+    0.11375144124031067,
+    -0.06075088679790497,
+    -0.21101467311382294,
+    0.23215331137180328,
+    0.3220982849597931,
+    -0.08772740513086319,
+    -0.1110156923532486,
+    0.150890052318573,
+    0.0899096205830574,
+    -0.11286906898021698,
+    -0.18477720022201538,
+    0.12096066772937775,
+    0.33322685956954956,
+    -0.2950510084629059,
+    0.2563823163509369,
+    0.11760752648115158,
+    -0.3458101749420166,
+    -0.4250616133213043,
+    -0.25533783435821533,
+    -0.2633964717388153,
+    -0.026663780212402344,
+    0.4655682444572449,
+    0.3740382790565491,
+    -0.05553853511810303,
+    0.024137284606695175,
+    -0.044697582721710205,
+    0.03481140360236168,
+    0.01849237084388733,
+    -0.15648233890533447,
+    -0.719332218170166,
+    0.5206979513168335,
+    -0.022456303238868713,
+    0.6854866743087769,
+    -0.32744836807250977,
+    -0.08906684815883636,
+    -0.02081950753927231,
+    -0.8612825870513916,
+    -0.1892240047454834,
+    0.07316698133945465,
+    0.4908924400806427,
+    0.30862411856651306,
+    0.3830990791320801,
+    0.38602370023727417,
+    0.25254443287849426,
+    0.26230084896087646,
+    0.12000225484371185,
+    0.0641913115978241,
+    -0.5113836526870728
+  ],
+  "Sol_Female_EN_US": [
+    0.1268293261528015,
+    -0.24892280995845795,
+    0.03928159922361374,
+    -0.08916330337524414,
+    -0.08921554684638977,
+    0.018120769411325455,
+    0.009445525705814362,
+    0.09456969052553177,
+    0.23499509692192078,
+    0.12589207291603088,
+    0.0817081481218338,
+    -0.05610091611742973,
+    -0.11433179676532745,
+    0.031890347599983215,
+    0.01497705653309822,
+    0.10599376261234283,
+    0.03902814909815788,
+    0.01317581906914711,
+    0.008249595761299133,
+    0.010834900662302971,
+    0.1323947310447693,
+    -0.14897435903549194,
+    -0.044409990310668945,
+    -0.004388481378555298,
+    -0.02122711017727852,
+    -0.21078309416770935,
+    0.05238814279437065,
+    -0.24263539910316467,
+    0.10478609800338745,
+    -0.046628206968307495,
+    -0.061156079173088074,
+    0.04726453870534897,
+    0.19356507062911987,
+    -0.10425321012735367,
+    -0.1245705783367157,
+    0.2371465265750885,
+    0.15406547486782074,
+    -0.11537078022956848,
+    -0.2574460506439209,
+    0.11589224636554718,
+    0.04982087016105652,
+    -0.0768856406211853,
+    -0.11789155006408691,
+    -0.13019400835037231,
+    0.03559808060526848,
+    -0.47099581360816956,
+    0.06938941776752472,
+    0.19138163328170776,
+    0.17706745862960815,
+    0.035381563007831573,
+    0.09636449813842773,
+    0.07912801951169968,
+    0.06765618175268173,
+    -0.1303500384092331,
+    -0.039963360875844955,
+    -0.04088369756937027,
+    0.02034657448530197,
+    0.008391611278057098,
+    0.02184874564409256,
+    -0.03052680939435959,
+    0.3498419225215912,
+    -0.07705945521593094,
+    -0.2935195565223694,
+    0.034476667642593384,
+    -0.1314329355955124,
+    0.20076632499694824,
+    0.016021449118852615,
+    0.23033341765403748,
+    -0.03349122032523155,
+    -0.18335162103176117,
+    0.029580311849713326,
+    0.018869629129767418,
+    0.10253989696502686,
+    -0.09266053140163422,
+    -0.03108178824186325,
+    -0.03976592794060707,
+    0.13201536238193512,
+    -0.028312936425209045,
+    -0.09032510221004486,
+    0.05712374672293663,
+    -0.17886731028556824,
+    -0.00012268498539924622,
+    -0.17655304074287415,
+    0.21560686826705933,
+    0.07977418601512909,
+    0.09157729148864746,
+    -0.08235643059015274,
+    -0.034677520394325256,
+    0.2231934666633606,
+    0.1851099133491516,
+    -0.2730552554130554,
+    -0.2409580945968628,
+    -0.273377925157547,
+    -0.11498671770095825,
+    0.29265373945236206,
+    -0.10599346458911896,
+    -0.05672678351402283,
+    0.026578396558761597,
+    -0.22945210337638855,
+    -0.08645745366811752,
+    0.028000690042972565,
+    -0.13934218883514404,
+    0.11353091895580292,
+    0.060757409781217575,
+    0.11343018710613251,
+    0.053218141198158264,
+    0.3181232810020447,
+    0.10948897153139114,
+    0.0357043594121933,
+    -0.1203552708029747,
+    0.11475016921758652,
+    -0.005062885582447052,
+    0.3342074751853943,
+    -0.1266603022813797,
+    0.07479999959468842,
+    -0.008454116061329842,
+    0.12023192644119263,
+    -0.03595118224620819,
+    0.02898475155234337,
+    -0.020386993885040283,
+    0.006668185815215111,
+    -0.15364103019237518,
+    -0.11951534450054169,
+    -0.0910012498497963,
+    0.19956853985786438,
+    0.04014497250318527,
+    -0.09457655251026154,
+    -0.12396776676177979,
+    0.23229674994945526,
+    -0.15745335817337036,
+    0.17193259298801422,
+    0.040711648762226105,
+    -0.12352880835533142,
+    0.018167633563280106,
+    -0.08081409335136414,
+    0.23432570695877075,
+    0.17171189188957214,
+    -0.03221336752176285,
+    0.03773265331983566,
+    -0.06490489095449448,
+    -0.030414387583732605,
+    0.4086611866950989,
+    0.07678371667861938,
+    0.15471185743808746,
+    0.009691998362541199,
+    0.21592354774475098,
+    0.16220787167549133,
+    0.13170170783996582,
+    0.11527039110660553,
+    -0.3844143748283386,
+    0.0421525314450264,
+    0.4349702298641205,
+    -0.1686660647392273,
+    0.005835492163896561,
+    -0.05163434147834778,
+    -0.38664859533309937,
+    0.09356559813022614,
+    -0.2766155004501343,
+    -0.13494873046875,
+    -0.07143319398164749,
+    -0.0797828882932663,
+    -0.10624134540557861,
+    -0.05675575137138367,
+    0.2754574418067932,
+    0.11232379078865051,
+    -0.026216700673103333,
+    -0.37042930722236633,
+    0.04595255106687546,
+    -0.08378663659095764,
+    0.113258957862854,
+    -0.10497808456420898,
+    -0.3882599174976349,
+    -0.09268787503242493,
+    -0.009513184428215027,
+    -0.03547880798578262,
+    -0.11325360834598541,
+    -0.4920811951160431,
+    -0.2420617938041687,
+    0.004631944000720978,
+    0.3054035007953644,
+    0.12272718548774719,
+    -0.1861076056957245,
+    -0.1328718364238739,
+    0.22628089785575867,
+    0.1674436330795288,
+    -0.2189907729625702,
+    0.25414198637008667,
+    0.08179888129234314,
+    0.014794200658798218,
+    -0.45081019401550293,
+    -0.4995046854019165,
+    -0.0721922218799591,
+    0.20731398463249207,
+    -0.07364560663700104,
+    -0.17112991213798523,
+    0.20308616757392883,
+    0.0781199038028717,
+    0.20510229468345642,
+    -0.18790192902088165,
+    0.08215056359767914,
+    0.05191810801625252,
+    -0.15418048202991486,
+    -0.1164349764585495,
+    -0.30107319355010986,
+    -0.07877662777900696,
+    0.006951943039894104,
+    -0.2136976420879364,
+    0.18753382563591003,
+    0.1558315008878708,
+    0.03319445252418518,
+    -0.20069114863872528,
+    0.5186187028884888,
+    0.29910457134246826,
+    -0.022099845111370087,
+    -0.2004503756761551,
+    0.11575216799974442,
+    0.06575708091259003,
+    0.29491111636161804,
+    0.042733918875455856,
+    0.13065889477729797,
+    -0.025842148810625076,
+    -0.48179322481155396,
+    0.12712322175502777,
+    -0.22928954660892487,
+    -0.4731486141681671,
+    0.2035326510667801,
+    -0.33841538429260254,
+    -0.09808406233787537,
+    0.30838146805763245,
+    0.06581465899944305,
+    0.047930970788002014,
+    0.01692097634077072,
+    0.22469750046730042,
+    -0.05486059561371803,
+    0.35013893246650696,
+    -0.283150851726532,
+    0.05401553213596344,
+    -0.04293721914291382,
+    0.03238523006439209,
+    0.30903106927871704,
+    0.318570613861084,
+    -0.36268168687820435,
+    0.01699633151292801,
+    -0.122194804251194,
+    -0.08210300654172897,
+    -0.08749544620513916,
+    0.04085458070039749,
+    0.26824674010276794,
+    -0.20407041907310486,
+    0.3028109669685364,
+    0.11649337410926819,
+    -0.06361576169729233,
+    0.022716812789440155,
+    0.8145036101341248,
+    -0.001978829503059387,
+    -0.19634583592414856
+  ],
+  "Georgia_Female_EN_US": [
+    0.14149390161037445,
+    -0.19759099185466766,
+    0.029538815841078758,
+    -0.1644008457660675,
+    -0.16974563896656036,
+    0.15899056196212769,
+    -0.08187974989414215,
+    0.06346520036458969,
+    0.171818345785141,
+    -0.03900427371263504,
+    0.08924897015094757,
+    0.11517727375030518,
+    -0.09470553696155548,
+    0.039182037115097046,
+    -0.0800875872373581,
+    0.027626454830169678,
+    0.057931605726480484,
+    -0.05594071373343468,
+    -0.01764649897813797,
+    0.1859845221042633,
+    0.19512777030467987,
+    -0.2715531587600708,
+    -0.15435153245925903,
+    -0.07994608581066132,
+    0.0034161433577537537,
+    -0.27405399084091187,
+    0.06616479158401489,
+    0.028649676591157913,
+    0.24419546127319336,
+    -0.053172968327999115,
+    -0.06803376972675323,
+    0.08285264670848846,
+    -0.03827327489852905,
+    -0.05404618754982948,
+    -0.1717120110988617,
+    0.0565122552216053,
+    0.12560471892356873,
+    -0.07519722729921341,
+    -0.005836378782987595,
+    -0.049631841480731964,
+    0.035924024879932404,
+    -0.20555508136749268,
+    -0.16342787444591522,
+    -0.011107422411441803,
+    -0.09510314464569092,
+    -0.8373715877532959,
+    -0.056464750319719315,
+    0.15504246950149536,
+    0.12261460721492767,
+    -0.002536684274673462,
+    0.14500755071640015,
+    0.17729829251766205,
+    -0.16478273272514343,
+    -0.07822693139314651,
+    0.03328864276409149,
+    -0.3484482765197754,
+    0.07604808360338211,
+    -0.22294224798679352,
+    0.06523670256137848,
+    -0.22709456086158752,
+    0.8876799941062927,
+    0.0027947500348091125,
+    0.0007318109273910522,
+    0.002863973379135132,
+    -0.21034874022006989,
+    0.051948100328445435,
+    -0.004550091922283173,
+    0.17473770678043365,
+    0.1153031662106514,
+    -0.09051527082920074,
+    -0.07489325851202011,
+    0.03644700348377228,
+    0.1395515352487564,
+    -0.010498672723770142,
+    -0.16194367408752441,
+    0.11820540577173233,
+    -0.1125202625989914,
+    0.07222796976566315,
+    0.0924602597951889,
+    0.009883157908916473,
+    0.14233753085136414,
+    -0.04211493209004402,
+    -0.09790381044149399,
+    0.1432836949825287,
+    -0.0207438375800848,
+    0.09433138370513916,
+    0.03480076417326927,
+    0.014073198661208153,
+    0.1459684669971466,
+    0.06838452816009521,
+    -0.4587509036064148,
+    -0.24484041333198547,
+    -0.13059453666210175,
+    -0.014020655304193497,
+    -0.04615045711398125,
+    -0.10020460933446884,
+    0.05875978618860245,
+    -0.11167144775390625,
+    -0.08788008987903595,
+    -0.06586126983165741,
+    0.0656682550907135,
+    0.06709162890911102,
+    0.02795044332742691,
+    0.11588016897439957,
+    -0.09147179126739502,
+    0.08282454311847687,
+    0.19108889997005463,
+    -0.09372390806674957,
+    -0.0004408508539199829,
+    -0.40825721621513367,
+    0.24378983676433563,
+    0.06450286507606506,
+    0.40147995948791504,
+    -0.12383461743593216,
+    0.09264419227838516,
+    0.04705287888646126,
+    -0.0979108139872551,
+    -0.04610448330640793,
+    0.06577446311712265,
+    0.06107745319604874,
+    -0.0739186629652977,
+    0.03969721123576164,
+    0.0321660116314888,
+    0.2023421972990036,
+    0.22365602850914001,
+    -0.33337128162384033,
+    0.10086256265640259,
+    -0.23017814755439758,
+    0.15227298438549042,
+    0.08262811601161957,
+    0.028533905744552612,
+    0.16887661814689636,
+    -0.1553392857313156,
+    0.04320569336414337,
+    -0.18707242608070374,
+    0.021115079522132874,
+    0.3647507131099701,
+    0.2119525820016861,
+    -0.02559354156255722,
+    0.268862247467041,
+    -0.03270912170410156,
+    0.01871364563703537,
+    -0.0919923335313797,
+    -0.13874371349811554,
+    -0.092261902987957,
+    0.0468045249581337,
+    0.29371997714042664,
+    0.21063821017742157,
+    -0.12585729360580444,
+    -0.4214266538619995,
+    -0.17777106165885925,
+    0.14042110741138458,
+    -0.1407075822353363,
+    -0.1934659481048584,
+    0.015365049242973328,
+    -0.12806877493858337,
+    -0.01690494269132614,
+    -0.2808881402015686,
+    -0.32276445627212524,
+    -0.04267498850822449,
+    0.04772596061229706,
+    -0.13011249899864197,
+    -0.4758068323135376,
+    0.21355567872524261,
+    -0.12164445221424103,
+    -0.10112264752388,
+    -0.0498490147292614,
+    0.2474687546491623,
+    -0.40088728070259094,
+    -0.21887987852096558,
+    -0.4579368829727173,
+    -0.21036852896213531,
+    -0.18377023935317993,
+    -0.23978865146636963,
+    -0.15847837924957275,
+    -0.36417555809020996,
+    -0.1878042072057724,
+    -0.12206757068634033,
+    0.4226543605327606,
+    -0.00703008845448494,
+    0.17988801002502441,
+    0.235824853181839,
+    0.0072716958820819855,
+    -0.022622771561145782,
+    0.4673866033554077,
+    -0.4320169687271118,
+    0.27932173013687134,
+    -0.1372895985841751,
+    0.13946086168289185,
+    -0.011557169258594513,
+    -0.11092820018529892,
+    -0.0025858357548713684,
+    0.06566678732633591,
+    -0.25665807723999023,
+    -0.2400297075510025,
+    0.055859118700027466,
+    -0.24934203922748566,
+    -0.05649476498365402,
+    -0.021823860704898834,
+    0.07491856813430786,
+    0.028743356466293335,
+    0.21002137660980225,
+    -0.5215728282928467,
+    0.05622958019375801,
+    -0.2222532033920288,
+    0.1794230341911316,
+    0.11855436116456985,
+    0.14668777585029602,
+    0.45487338304519653,
+    -0.1859143078327179,
+    -0.05654382333159447,
+    -0.16731679439544678,
+    -0.1562391221523285,
+    0.16424456238746643,
+    0.2154158502817154,
+    0.3380601406097412,
+    0.12264357507228851,
+    0.3392817974090576,
+    -0.060549046844244,
+    -0.14765986800193787,
+    0.11267101764678955,
+    -0.24056652188301086,
+    0.03510596603155136,
+    0.10618807375431061,
+    -0.15641556680202484,
+    -0.24543322622776031,
+    -0.19173413515090942,
+    -0.011205855756998062,
+    0.24790503084659576,
+    0.32398396730422974,
+    0.22276073694229126,
+    0.018482085317373276,
+    -0.03579630330204964,
+    0.05034150183200836,
+    0.29536929726600647,
+    -0.050280749797821045,
+    -0.014656215906143188,
+    -0.3677038550376892,
+    0.41170692443847656,
+    0.15874658524990082,
+    0.34870871901512146,
+    -0.23689004778862,
+    0.2970763146877289,
+    0.0950806587934494,
+    -0.1269141584634781,
+    -0.11931035667657852,
+    0.13633345067501068,
+    0.42843636870384216,
+    0.03449300676584244,
+    0.4283212721347809,
+    0.2762477397918701,
+    0.1679811179637909,
+    0.2898493707180023,
+    -0.04722334071993828,
+    -0.047664619982242584,
+    -0.22933121025562286
+  ],
+  "Marry_Female_EN_US": [
+    0.10095467418432236,
+    0.046844299882650375,
+    0.05421638488769531,
+    -0.09417131543159485,
+    -0.18054454028606415,
+    0.0935884565114975,
+    -0.11312611401081085,
+    0.02784895896911621,
+    0.13980317115783691,
+    -0.08165936917066574,
+    0.10532249510288239,
+    0.09783805906772614,
+    0.01645722985267639,
+    0.04216833412647247,
+    -0.1025347113609314,
+    0.09854228794574738,
+    0.22359934449195862,
+    0.08323220163583755,
+    0.003406684845685959,
+    0.30394530296325684,
+    0.3451034426689148,
+    -0.29881906509399414,
+    -0.08311712741851807,
+    -0.109955795109272,
+    0.07522188872098923,
+    -0.38127946853637695,
+    0.029290571808815002,
+    -0.012949233874678612,
+    0.22986799478530884,
+    -0.22929272055625916,
+    -0.11333343386650085,
+    0.1066955029964447,
+    -0.03432668745517731,
+    -0.10237376391887665,
+    -0.11407271027565002,
+    -0.01221979409456253,
+    0.19828736782073975,
+    -0.08432801812887192,
+    -0.07885870337486267,
+    -0.09633795917034149,
+    0.07740725576877594,
+    -0.14024686813354492,
+    0.007659006863832474,
+    -0.061528440564870834,
+    -0.15116117894649506,
+    -0.9346913695335388,
+    -0.19321316480636597,
+    0.09346023201942444,
+    0.008720653131604195,
+    0.00935569778084755,
+    0.059522844851017,
+    -0.0004963874816894531,
+    -0.11127720773220062,
+    -0.015941940248012543,
+    0.11759459227323532,
+    -0.38565748929977417,
+    0.014210086315870285,
+    -0.4402802586555481,
+    -0.03058554232120514,
+    -0.15320685505867004,
+    0.925262451171875,
+    0.05237797647714615,
+    -0.06457516551017761,
+    0.04277027025818825,
+    -0.09071764349937439,
+    -0.023430675268173218,
+    0.018660694360733032,
+    0.28416356444358826,
+    0.15927383303642273,
+    -0.036094918847084045,
+    -0.18289241194725037,
+    -0.16174408793449402,
+    0.1352432817220688,
+    -0.11155793070793152,
+    -0.21458107233047485,
+    -0.007756996899843216,
+    -0.17188167572021484,
+    -0.014599844813346863,
+    0.03282542526721954,
+    0.10045303404331207,
+    0.11301460862159729,
+    -0.04795491322875023,
+    -0.05172593891620636,
+    0.11332973837852478,
+    0.07555423676967621,
+    0.10994540899991989,
+    -0.07060486078262329,
+    0.004575258120894432,
+    0.11668689548969269,
+    0.10401762276887894,
+    -0.6524990797042847,
+    -0.2616415023803711,
+    -0.2577282190322876,
+    0.013369720429182053,
+    0.015914201736450195,
+    -0.1528300940990448,
+    0.06751468777656555,
+    -0.2593517601490021,
+    -0.04114726930856705,
+    -0.12167482078075409,
+    -0.016297057271003723,
+    0.050480689853429794,
+    0.08136023581027985,
+    0.10589707642793655,
+    0.0009140158072113991,
+    0.09637215733528137,
+    0.13627931475639343,
+    -0.08097885549068451,
+    -0.04615870863199234,
+    -0.3712182939052582,
+    0.17477694153785706,
+    -0.03713107109069824,
+    0.3938117027282715,
+    -0.16526120901107788,
+    0.046960875391960144,
+    0.13634932041168213,
+    -0.2535812258720398,
+    -0.004662476480007172,
+    -0.1287195086479187,
+    0.04682536423206329,
+    -0.0553663894534111,
+    -0.007208423689007759,
+    -0.03398251533508301,
+    0.04032691568136215,
+    0.10826413333415985,
+    -0.3297663927078247,
+    0.12369295954704285,
+    -0.22297564148902893,
+    0.016204068437218666,
+    0.22467367351055145,
+    0.06251698732376099,
+    0.25536197423934937,
+    -0.0313156396150589,
+    0.23897168040275574,
+    -0.125069260597229,
+    0.05682749301195145,
+    0.2709246873855591,
+    0.11623440682888031,
+    -0.08916947990655899,
+    -0.0015965849161148071,
+    0.021189596503973007,
+    0.1729092001914978,
+    -0.20169132947921753,
+    -0.010327596217393875,
+    -0.036886122077703476,
+    0.01917070895433426,
+    0.18902111053466797,
+    0.5179728269577026,
+    0.31896597146987915,
+    -0.7427007555961609,
+    -0.4137954115867615,
+    0.06960596889257431,
+    0.06620097160339355,
+    -0.1536514014005661,
+    0.1503698229789734,
+    -0.14556577801704407,
+    0.14588545262813568,
+    -0.18597960472106934,
+    -0.342746764421463,
+    0.05013357102870941,
+    -0.02868656814098358,
+    -0.26822707056999207,
+    -0.3400660753250122,
+    -0.02838587388396263,
+    0.004168674349784851,
+    -0.17664480209350586,
+    0.020639866590499878,
+    0.14682283997535706,
+    -0.2350459098815918,
+    -0.19191408157348633,
+    -0.1417776346206665,
+    -0.15922360122203827,
+    -0.2131480872631073,
+    -0.07812295854091644,
+    -0.22641682624816895,
+    -0.08451198041439056,
+    -0.27697646617889404,
+    -0.16417096555233002,
+    0.17140953242778778,
+    -0.014865081757307053,
+    0.1978265941143036,
+    0.17148619890213013,
+    0.051121003925800323,
+    -0.0503043532371521,
+    0.4206354022026062,
+    -0.6276400089263916,
+    0.19541236758232117,
+    -0.05150662735104561,
+    0.20107480883598328,
+    0.30449116230010986,
+    -0.2169422060251236,
+    0.07420805841684341,
+    0.19388242065906525,
+    -0.09795433282852173,
+    -0.3506614565849304,
+    0.29341161251068115,
+    0.036286331713199615,
+    0.2008945643901825,
+    -0.13470220565795898,
+    -0.3342384099960327,
+    0.049659550189971924,
+    -0.23403503000736237,
+    -0.2438443899154663,
+    -0.0661768913269043,
+    -0.15710312128067017,
+    0.06331466138362885,
+    0.1287412941455841,
+    0.2185135930776596,
+    0.2592580318450928,
+    -0.25041234493255615,
+    0.04421650990843773,
+    0.031913772225379944,
+    0.1675594300031662,
+    0.1488073468208313,
+    -0.14290811121463776,
+    0.3285759687423706,
+    0.22858765721321106,
+    0.3819029927253723,
+    -0.04735409840941429,
+    -0.18470162153244019,
+    0.2156306505203247,
+    -0.262035995721817,
+    0.1106458529829979,
+    -0.33096078038215637,
+    -0.40564554929733276,
+    -0.1130962148308754,
+    -0.4035260081291199,
+    0.009994406253099442,
+    0.19823773205280304,
+    0.09995241463184357,
+    0.10737255960702896,
+    0.16501155495643616,
+    -0.21067723631858826,
+    -0.011145517230033875,
+    0.013040252029895782,
+    -0.3106451630592346,
+    -0.048852063715457916,
+    -0.2291208952665329,
+    0.28707051277160645,
+    0.11026108264923096,
+    0.5360386967658997,
+    -0.1761239767074585,
+    0.07656016945838928,
+    -0.07331065833568573,
+    -0.47247397899627686,
+    -0.21432432532310486,
+    -0.21592572331428528,
+    0.6710861921310425,
+    0.11024707555770874,
+    0.19684234261512756,
+    0.2528229355812073,
+    0.21830880641937256,
+    0.1830369234085083,
+    0.07172520458698273,
+    0.24994215369224548,
+    -0.14005254209041595
+  ],
+  "Samuel_Male_EN_US": [
+    -0.12619435787200928,
+    -0.11846257001161575,
+    0.04108911007642746,
+    -0.10919006168842316,
+    -0.18582119047641754,
+    0.3603861629962921,
+    -0.08595605194568634,
+    -0.02000698447227478,
+    0.19657589495182037,
+    0.1481103152036667,
+    0.15841630101203918,
+    0.13725560903549194,
+    -0.023550238460302353,
+    0.11064086854457855,
+    0.0004522055387496948,
+    0.039599962532520294,
+    0.03390733152627945,
+    -0.0010563544929027557,
+    -0.06491883099079132,
+    0.21764393150806427,
+    0.16938678920269012,
+    0.11513420194387436,
+    0.08827359974384308,
+    -0.0926792100071907,
+    0.14648687839508057,
+    -0.21553286910057068,
+    0.023113107308745384,
+    -0.121593177318573,
+    0.11240999400615692,
+    -0.12347493320703506,
+    -0.18039049208164215,
+    0.1588599681854248,
+    -0.17081257700920105,
+    -0.2037820667028427,
+    -0.1563880443572998,
+    -0.0917324647307396,
+    0.03558758646249771,
+    -0.11815845966339111,
+    -0.20688572525978088,
+    -0.0739545151591301,
+    -0.0853046327829361,
+    -0.21343617141246796,
+    0.04951489716768265,
+    -0.44510698318481445,
+    -0.09933532774448395,
+    -0.8458589911460876,
+    -0.16738075017929077,
+    0.16498824954032898,
+    0.01717434823513031,
+    -0.06231179088354111,
+    0.16252321004867554,
+    0.04281383752822876,
+    -0.12609757483005524,
+    0.05567052215337753,
+    -0.027309387922286987,
+    -0.3515397310256958,
+    0.04615774750709534,
+    -0.6998573541641235,
+    0.06659585237503052,
+    -0.13769188523292542,
+    0.6031708717346191,
+    0.011859655380249023,
+    -0.33847033977508545,
+    -0.12553080916404724,
+    -0.18536464869976044,
+    -0.007513280957937241,
+    0.519316554069519,
+    0.23655596375465393,
+    0.24828562140464783,
+    0.13927540183067322,
+    0.14041166007518768,
+    -0.09467436373233795,
+    0.11043473333120346,
+    -0.06588282436132431,
+    -0.11430786550045013,
+    0.026140939444303513,
+    -0.2597509026527405,
+    0.24612551927566528,
+    0.10610029101371765,
+    -0.23051029443740845,
+    -0.2547153830528259,
+    -0.002995643764734268,
+    0.17297737300395966,
+    0.03597598150372505,
+    0.15152215957641602,
+    -0.06193822994828224,
+    -0.07088689506053925,
+    0.03826475143432617,
+    0.08488129824399948,
+    0.2690422534942627,
+    -0.5332049131393433,
+    -0.2220773696899414,
+    -0.5455571413040161,
+    -0.09902779757976532,
+    0.21370843052864075,
+    -0.14772546291351318,
+    0.17388656735420227,
+    -0.12971428036689758,
+    0.019031524658203125,
+    -0.03160820156335831,
+    0.002624817192554474,
+    0.023578234016895294,
+    0.12251117080450058,
+    -0.0782250463962555,
+    0.03130299225449562,
+    0.11962416768074036,
+    0.23477508127689362,
+    0.07299475371837616,
+    -0.0542200468480587,
+    -0.4101184606552124,
+    0.08486519753932953,
+    -0.04950159043073654,
+    0.44324159622192383,
+    -0.20634889602661133,
+    0.059250980615615845,
+    -0.016344502568244934,
+    -0.10346954315900803,
+    0.05175183713436127,
+    0.10785773396492004,
+    -0.13179540634155273,
+    -0.21312423050403595,
+    0.08348912745714188,
+    -0.1100846529006958,
+    -0.0912843644618988,
+    0.16458404064178467,
+    -0.22458869218826294,
+    0.057109322398900986,
+    -0.14130070805549622,
+    -0.27792462706565857,
+    0.2201579511165619,
+    0.14192339777946472,
+    0.370261013507843,
+    -0.1133350059390068,
+    0.5210106372833252,
+    -0.2802170515060425,
+    0.2284509539604187,
+    0.08346766233444214,
+    0.2824746072292328,
+    -0.2277216911315918,
+    0.28492996096611023,
+    0.13533133268356323,
+    0.17641571164131165,
+    0.055212635546922684,
+    -0.1420554518699646,
+    -0.3370305895805359,
+    -0.05939646065235138,
+    -0.012498218566179276,
+    0.3336101472377777,
+    -0.009854704141616821,
+    -0.21461555361747742,
+    -0.06897716224193573,
+    0.42768532037734985,
+    0.2918033301830292,
+    0.17972910404205322,
+    0.08387543261051178,
+    -0.3834030032157898,
+    0.04259374737739563,
+    -0.24948984384536743,
+    -0.4711057245731354,
+    0.23269453644752502,
+    -0.11432869732379913,
+    -0.3319881558418274,
+    -0.5810990333557129,
+    -0.16500617563724518,
+    -0.513327419757843,
+    -0.22280623018741608,
+    -0.06446528434753418,
+    0.053828924894332886,
+    -0.3869067430496216,
+    -0.18398475646972656,
+    -0.39183861017227173,
+    -0.1645130217075348,
+    -0.10562220215797424,
+    0.09369634836912155,
+    -0.10099713504314423,
+    -0.2670536935329437,
+    -0.3405263423919678,
+    -0.4056051969528198,
+    0.4251013398170471,
+    -0.06249173730611801,
+    0.045685671269893646,
+    0.4523245692253113,
+    0.03759829327464104,
+    -0.17996317148208618,
+    0.4454271197319031,
+    -0.9131506681442261,
+    0.12587328255176544,
+    0.03575079143047333,
+    0.1569804549217224,
+    -0.005473516881465912,
+    -0.20985543727874756,
+    0.02623889595270157,
+    0.27007320523262024,
+    -0.10873401910066605,
+    0.05130569636821747,
+    -0.0011592544615268707,
+    -0.0032558459788560867,
+    0.3350878059864044,
+    0.14362351596355438,
+    -0.046291008591651917,
+    0.1339847445487976,
+    0.054939839988946915,
+    -0.20351824164390564,
+    0.13673129677772522,
+    -0.5390269160270691,
+    0.20881281793117523,
+    -0.12060403823852539,
+    0.08859001100063324,
+    -0.025972001254558563,
+    -0.4430112838745117,
+    -0.004151973873376846,
+    0.18464192748069763,
+    0.3526911437511444,
+    -0.09955041110515594,
+    0.10740470886230469,
+    0.35204750299453735,
+    0.013931604102253914,
+    0.5496764183044434,
+    0.004158753901720047,
+    0.2092914879322052,
+    0.09148923307657242,
+    -0.2677595019340515,
+    0.044244516640901566,
+    0.25607749819755554,
+    -0.3345734179019928,
+    -0.251026451587677,
+    -0.5243716239929199,
+    -0.07589935511350632,
+    0.2236466407775879,
+    0.692629337310791,
+    0.30974945425987244,
+    -0.05844153091311455,
+    0.03120841458439827,
+    0.06444196403026581,
+    -0.04019276052713394,
+    0.20844346284866333,
+    0.02490842342376709,
+    -0.4918057918548584,
+    0.17454466223716736,
+    0.008414536714553833,
+    0.3902796804904938,
+    -0.358223557472229,
+    0.03861651197075844,
+    0.18186673521995544,
+    -0.527253270149231,
+    -0.13051800429821014,
+    0.05706249922513962,
+    0.6442924737930298,
+    0.1802804172039032,
+    0.41333693265914917,
+    0.23335036635398865,
+    0.1602075695991516,
+    0.05822325870394707,
+    0.07076271623373032,
+    0.1175406351685524,
+    -0.11005706340074539
+  ],
+  "Peter_Male_EN_US": [
+    -0.02671806514263153,
+    -0.1403174251317978,
+    0.05001065880060196,
+    -0.1078786849975586,
+    -0.11178930848836899,
+    0.35923928022384644,
+    -0.10216598212718964,
+    -0.0744534507393837,
+    0.1513819694519043,
+    0.033666566014289856,
+    0.18034929037094116,
+    0.17279928922653198,
+    0.025053754448890686,
+    -0.041277479380369186,
+    -0.017297936603426933,
+    0.089497409760952,
+    0.09309914708137512,
+    -0.013885125517845154,
+    0.006094053387641907,
+    0.17924150824546814,
+    0.24564367532730103,
+    0.0279664546251297,
+    -0.0281650610268116,
+    -0.21447591483592987,
+    0.05961482226848602,
+    -0.24900272488594055,
+    0.08882076293230057,
+    -0.08165736496448517,
+    0.10724847763776779,
+    -0.12638989090919495,
+    -0.17386965453624725,
+    0.13405269384384155,
+    -0.34381765127182007,
+    -0.14751896262168884,
+    -0.22588366270065308,
+    -0.1478700041770935,
+    0.17554007470607758,
+    -0.06141895055770874,
+    -0.08225107192993164,
+    -0.11144131422042847,
+    -0.06510056555271149,
+    -0.28190499544143677,
+    0.038482390344142914,
+    -0.27125123143196106,
+    -0.18446698784828186,
+    -0.9286922812461853,
+    -0.22221821546554565,
+    0.1890208125114441,
+    -0.019690819084644318,
+    -0.0410115122795105,
+    0.020294375717639923,
+    0.027921512722969055,
+    -0.1279190480709076,
+    0.147661954164505,
+    -0.06851989030838013,
+    -0.3743244409561157,
+    0.16605032980442047,
+    -0.6898317337036133,
+    0.1322338581085205,
+    -0.24458415806293488,
+    0.8091621398925781,
+    -0.0020183511078357697,
+    -0.2860056161880493,
+    -0.101268470287323,
+    -0.12195206433534622,
+    -0.0030680038034915924,
+    0.35305753350257874,
+    0.18254509568214417,
+    0.15022733807563782,
+    0.05763908848166466,
+    0.05388329550623894,
+    -0.1870671808719635,
+    0.16868555545806885,
+    -0.005561813712120056,
+    -0.038571834564208984,
+    0.04676483944058418,
+    -0.30886393785476685,
+    0.167225182056427,
+    0.09490098804235458,
+    -0.1838131844997406,
+    -0.16072562336921692,
+    -0.07846863567829132,
+    -0.02979177236557007,
+    0.11231174319982529,
+    0.12191181629896164,
+    0.0011293292045593262,
+    0.00034183263778686523,
+    0.03643456846475601,
+    -0.004104208201169968,
+    0.0210350900888443,
+    -0.5258797407150269,
+    -0.28434744477272034,
+    -0.3846287727355957,
+    0.028454381972551346,
+    0.268708735704422,
+    -0.1812271922826767,
+    0.14686831831932068,
+    -0.19878965616226196,
+    0.014050750061869621,
+    -0.08926080912351608,
+    0.005555577576160431,
+    0.031308822333812714,
+    0.06296883523464203,
+    -0.030255869030952454,
+    0.014618240296840668,
+    0.15111848711967468,
+    0.08761651813983917,
+    0.017063427716493607,
+    -0.10042140632867813,
+    -0.5389280915260315,
+    0.01812286674976349,
+    -0.05892270803451538,
+    0.6147855520248413,
+    -0.2805640995502472,
+    0.04736167937517166,
+    0.08966498076915741,
+    -0.21231184899806976,
+    0.029807910323143005,
+    0.09918596595525742,
+    -0.030193090438842773,
+    -0.18508130311965942,
+    0.03303778916597366,
+    -0.04084295034408569,
+    -0.09405478835105896,
+    0.08350837975740433,
+    -0.2915036678314209,
+    -0.029322421178221703,
+    -0.2637880742549896,
+    -0.30284351110458374,
+    0.25083762407302856,
+    -0.05462878942489624,
+    0.437039852142334,
+    0.19965985417366028,
+    0.10523413866758347,
+    -0.10033008456230164,
+    0.08067487925291061,
+    0.4820234775543213,
+    0.572240948677063,
+    0.013404153287410736,
+    -0.18431192636489868,
+    0.049913421273231506,
+    0.14440348744392395,
+    0.010796692222356796,
+    -0.04425942152738571,
+    -0.034714244306087494,
+    0.09812092781066895,
+    0.22658464312553406,
+    0.5398628115653992,
+    0.14926102757453918,
+    -0.09294281154870987,
+    -0.29379820823669434,
+    0.2622401714324951,
+    -0.09194155037403107,
+    -0.007996812462806702,
+    0.16660672426223755,
+    -0.4457249045372009,
+    -0.1516806185245514,
+    -0.270463764667511,
+    -0.39716261625289917,
+    -0.1890673041343689,
+    -0.21207217872142792,
+    -0.24664843082427979,
+    -0.284817099571228,
+    -0.16662172973155975,
+    -0.1995084583759308,
+    0.07401363551616669,
+    0.006268583238124847,
+    -0.1385871022939682,
+    -0.24141649901866913,
+    -0.1358436644077301,
+    -0.268524706363678,
+    -0.13288292288780212,
+    -0.09328028559684753,
+    -0.037290073931217194,
+    -0.05488095059990883,
+    -0.03209332749247551,
+    -0.7609561085700989,
+    -0.2482100874185562,
+    0.23829908668994904,
+    -0.03605654835700989,
+    0.26974982023239136,
+    0.4465060234069824,
+    -0.1619127094745636,
+    -0.08157207816839218,
+    0.6551785469055176,
+    -0.6885366439819336,
+    0.26440972089767456,
+    -0.21962705254554749,
+    0.009923815727233887,
+    -0.02745674178004265,
+    -0.25602495670318604,
+    0.16939014196395874,
+    0.22960194945335388,
+    -0.113400399684906,
+    0.1810494065284729,
+    0.09377805143594742,
+    0.24526025354862213,
+    0.5957650542259216,
+    -0.1350148618221283,
+    -0.17307522892951965,
+    0.4434152841567993,
+    -0.06832988560199738,
+    -0.07215136289596558,
+    -0.18067269027233124,
+    -0.2654397487640381,
+    0.22328779101371765,
+    -0.1092233955860138,
+    0.001668304204940796,
+    0.05992841348052025,
+    -0.2866281270980835,
+    -0.0010518133640289307,
+    0.3629896640777588,
+    0.026638980954885483,
+    -0.3393930196762085,
+    0.162781223654747,
+    -0.21661463379859924,
+    -0.12741011381149292,
+    0.21406680345535278,
+    -0.16890040040016174,
+    0.014303475618362427,
+    0.1664152592420578,
+    -0.1924239546060562,
+    0.42883533239364624,
+    0.04888498783111572,
+    -0.5888325572013855,
+    -0.21445952355861664,
+    -0.2693358361721039,
+    -0.2160845249891281,
+    -0.18929903209209442,
+    0.29195863008499146,
+    0.08952829986810684,
+    -0.1636795699596405,
+    0.24474045634269714,
+    -0.28684142231941223,
+    -0.17337173223495483,
+    0.004411667585372925,
+    0.01754007488489151,
+    -0.5038971304893494,
+    0.2621816396713257,
+    -0.01969096064567566,
+    0.31702128052711487,
+    -0.40279054641723633,
+    -0.11601737886667252,
+    -0.014087185263633728,
+    -0.7800108194351196,
+    -0.11777613312005997,
+    -0.08311420679092407,
+    0.38773632049560547,
+    0.14981284737586975,
+    0.2543047368526459,
+    0.44576597213745117,
+    0.14574092626571655,
+    0.17641997337341309,
+    0.1667698472738266,
+    0.04656987264752388,
+    -0.36774906516075134
+  ],
+  "Jack_Male_EN_US": [
+    -0.04046084274887107,
+    -0.15095742102712392,
+    -0.006839682068675756,
+    -0.05994556490331888,
+    0.006684107612818471,
+    0.21780437231063843,
+    -0.05186830650782213,
+    0.045858974114526066,
+    0.18578437742544338,
+    0.23851692618336529,
+    0.25875567211769523,
+    0.01337982285767794,
+    0.0575837992830202,
+    0.010126538737677035,
+    -0.11069863769225777,
+    0.10501059270463883,
+    -0.038009885698556914,
+    0.09259203597903251,
+    0.006737061683088536,
+    0.033672554418444633,
+    0.12799083036952652,
+    -0.021536123100668186,
+    0.03342777863144876,
+    -0.17997418325394393,
+    0.017667141649872063,
+    -0.0656133412849158,
+    0.07788868229836225,
+    -0.20092849116772413,
+    -0.013813202735036612,
+    -0.15624882429838183,
+    -0.02039524572901428,
+    0.06695615525532048,
+    -0.03102828292176127,
+    -0.003229711949825284,
+    -0.16995424311608076,
+    0.021083407942205673,
+    0.0890236813283991,
+    -0.00943179002497345,
+    -0.016149783227592696,
+    0.09971937909722328,
+    -0.09221453487407416,
+    -0.15833347449079158,
+    -0.16861484068795107,
+    -0.19585764929652213,
+    0.019861079845577473,
+    -0.5355675680562854,
+    -0.03166075125336647,
+    0.27058335933834315,
+    -0.015265139937400807,
+    -0.014771698974072935,
+    0.0019185470417142012,
+    0.05587198091670871,
+    -0.09576068501919509,
+    -0.021184422494843605,
+    -0.17917617466300725,
+    -0.17127673390787096,
+    0.20629609785974026,
+    -0.18852811860851945,
+    0.012637676135636867,
+    -0.05553631568327547,
+    0.446656902320683,
+    -0.08865800648927688,
+    -0.0358334188349545,
+    -0.010281644179485738,
+    -0.07811169922351838,
+    0.109587676031515,
+    0.2809453630819917,
+    0.11856715977191926,
+    0.05583547845017166,
+    -0.12626958163455126,
+    0.1281449523754418,
+    -0.06413849201053382,
+    0.15792119931429624,
+    0.03743950969073922,
+    0.039248654276161685,
+    0.025773864006623626,
+    -0.11292729973793031,
+    0.20113790165632964,
+    -0.16639176942408085,
+    -0.08434787057340147,
+    -0.09793199738487603,
+    0.006178376311436293,
+    -0.013375372998416409,
+    0.0542846780270338,
+    0.10225461367517709,
+    0.039542005566181614,
+    0.039125220011919745,
+    -0.026181252760579806,
+    0.033635431178845474,
+    0.0862573640421033,
+    -0.16706872563809155,
+    -0.17828714922070504,
+    -0.12449762681499123,
+    -0.03108778726309538,
+    0.34800285045057533,
+    -0.09101906679570675,
+    0.10009890785440802,
+    -0.09514827439561487,
+    -0.034053249843418586,
+    -0.1105702156201005,
+    0.044860146183054894,
+    0.1358713038265705,
+    0.08085576379671693,
+    -0.08721686480566859,
+    0.0573709562420845,
+    0.11415926301851868,
+    0.245212434977293,
+    0.10061340439133346,
+    -0.14243060679873454,
+    -0.29007883500307796,
+    0.2037310192361474,
+    -0.07627206621691585,
+    0.39687543553300203,
+    -0.05620414759032429,
+    0.09882601262070238,
+    -0.0677183491177857,
+    -0.006265022698789827,
+    0.023330946313217284,
+    0.165302169136703,
+    0.02257582815364003,
+    -0.12311515075853095,
+    -0.1469769559800625,
+    0.012314948812127115,
+    -0.021668313816189756,
+    0.34593041818588977,
+    -0.04288801420480014,
+    -0.16514885276556016,
+    -0.18242249870672822,
+    -0.0026492349803447918,
+    -0.07602755706757307,
+    0.3050278574228287,
+    0.05592154124751687,
+    0.23462600968778133,
+    0.04983584135770798,
+    -0.278392353374511,
+    0.22432250184938313,
+    0.2686607390176505,
+    0.09875037595629692,
+    0.1183511849027127,
+    -0.054236005432903786,
+    0.08722816870140376,
+    0.03687728531658649,
+    0.02554770354181528,
+    0.12954192645847795,
+    -0.17979382164776325,
+    0.3348038989119232,
+    0.4423634188249707,
+    0.4926473870873451,
+    0.20223852992057798,
+    0.025749067217111565,
+    0.17501560486853124,
+    0.38072580406442286,
+    -0.2901147920638323,
+    0.20979762431234122,
+    0.2653069855645299,
+    -0.3772167260758579,
+    -0.09222150184214115,
+    -0.39089010236784816,
+    0.20441576987504956,
+    -0.23905933797359466,
+    -0.10717785591259599,
+    -0.16813391000032427,
+    -0.3918773714452982,
+    0.22002617083489895,
+    -0.07564694900065663,
+    0.07937551865907153,
+    0.08824989823624493,
+    0.29479082534089684,
+    -0.36032405607402324,
+    -0.2603407606482506,
+    -0.6042231546249242,
+    -0.28955514542758465,
+    -0.03436102028936147,
+    -0.04779914440587163,
+    -0.009762127837166193,
+    -0.10187441464513541,
+    -0.7215779416263104,
+    -0.27634545378386977,
+    0.17830019462853672,
+    -0.09875116832554341,
+    0.09090687595307827,
+    0.13431233698502182,
+    -0.2642242418602109,
+    -0.04540154328569772,
+    0.4095670249313116,
+    -0.35262783467769626,
+    0.16893002432771026,
+    0.08783781807869673,
+    0.25395081788301466,
+    -0.28145490009337665,
+    -0.326755971997045,
+    0.15969305289909244,
+    0.028953553736209857,
+    -0.21651662606745958,
+    0.4146030018106103,
+    -0.03423323482275009,
+    -0.16287488527595997,
+    0.39450788777321577,
+    0.12951190834864976,
+    0.1056388876902929,
+    0.08080296535044909,
+    0.3574946537613869,
+    0.019849372282624234,
+    0.040822872455464676,
+    -0.2921319276094437,
+    0.2146362524013966,
+    -0.134716684371233,
+    0.07487714374437929,
+    0.009296230599284167,
+    -0.04380686506628992,
+    -0.19373088590800763,
+    0.2639585494995117,
+    0.18104018196463584,
+    -0.16598513473290952,
+    0.04853666033595801,
+    -0.14947416950017212,
+    -0.21553540676832197,
+    0.08810192588716745,
+    -0.2630701248068362,
+    0.20122109837830066,
+    0.12282457035034895,
+    -0.37653126297518613,
+    0.23556544631719586,
+    0.10754718948155645,
+    -0.19623969851527362,
+    -0.37442944198846817,
+    -0.24549162713810802,
+    -0.12071249298751355,
+    0.07532338486053047,
+    0.4720609566196799,
+    0.33625265657901765,
+    0.09440774954855442,
+    0.08528476338833571,
+    -0.1694023549556732,
+    -0.07463834583759302,
+    0.11786841377615931,
+    -0.09039792915573344,
+    -0.5210130661725998,
+    0.24727064482867717,
+    -0.16189097724854945,
+    0.4837450442370027,
+    -0.1521998167037964,
+    -0.06750598764047028,
+    -0.004553849250078207,
+    -0.22957104928791522,
+    -0.017605035123415283,
+    0.2760094117373228,
+    0.05957211840432136,
+    -0.0009269976260838989,
+    0.1953226298559457,
+    0.223721924982965,
+    0.051529260072857144,
+    0.07620697033125907,
+    0.3487104419618845,
+    -0.29083244649809786,
+    -0.2311014744453132
+  ],
+  "Henry_Male_EN_US": [
+    0.03231465221033432,
+    -0.17197506912052632,
+    -0.15499479230493307,
+    -0.1250289712101221,
+    -0.16165112853050234,
+    0.11880796402692793,
+    -0.026341438165400174,
+    0.02843754307832569,
+    0.15634612458525227,
+    0.10595979747595265,
+    0.039114803401753315,
+    0.010465619154274462,
+    0.05449719361495225,
+    0.16031873143510894,
+    -0.1695658477488905,
+    -0.0018680405337363482,
+    -0.08171003330498933,
+    0.023001285642385474,
+    -0.1536627289839089,
+    0.011052230559289444,
+    0.11655736799002625,
+    -0.15112714925780893,
+    -0.010360681824386115,
+    -0.11909673623740674,
+    -0.21134809795767068,
+    0.20729044654872264,
+    0.19189890939742327,
+    0.034412209317088105,
+    0.07652324698865413,
+    -0.09826281983405351,
+    -0.03507392066530883,
+    0.05623610065958929,
+    0.023545358702540417,
+    0.1944381920620799,
+    -0.18579835649579762,
+    0.13530588764697313,
+    0.047002217611589,
+    -0.03857994754798711,
+    0.21279680896550413,
+    -0.0011355822905898053,
+    -0.11157526604365556,
+    -0.2419595753774047,
+    -0.25546876950538716,
+    0.03623581342399121,
+    -0.2026651309803128,
+    -0.5091725187376142,
+    0.11082670092582703,
+    0.1577077390626073,
+    -0.3597980797290802,
+    -0.011146663455292584,
+    0.08107478127349169,
+    0.22176175282802432,
+    -0.1103294763015583,
+    -0.15145504621323197,
+    -0.2445066723972559,
+    -0.1946099933935329,
+    0.256573729775846,
+    0.079415076575242,
+    0.07555773077765479,
+    -0.1760544968303293,
+    0.45968954982236027,
+    -0.2993650084361434,
+    0.23150971811264753,
+    -0.007668233546428378,
+    0.014109187014400976,
+    0.10277328002266586,
+    0.06715730596333742,
+    -0.1751516081392765,
+    0.13003269975306464,
+    -0.01215957077220084,
+    -0.10042227925732733,
+    0.13036054857075213,
+    0.008775722794234747,
+    0.07856735654640942,
+    -0.0694429306051461,
+    -0.04239498968236148,
+    -0.04804698210209607,
+    0.25152273084968324,
+    -0.1492430506274104,
+    0.16119943596422673,
+    0.33940611872822046,
+    0.08957686950452626,
+    -0.0576036872342229,
+    -0.08172749504446983,
+    -0.1345309724099934,
+    0.2364609685027972,
+    0.10072718104347586,
+    0.003340821829624467,
+    -0.04597767407540229,
+    0.018918244726955885,
+    -0.11239160280674693,
+    -0.07376309838145972,
+    0.12267176546156411,
+    -0.07140531631885097,
+    0.06295341402292251,
+    0.14427131954580547,
+    0.06250183843076229,
+    -0.1606520629953593,
+    -0.027804251573979866,
+    -0.14481351664289832,
+    0.11379512277198955,
+    0.4806660775095224,
+    -0.08994001736864446,
+    0.00915752639994026,
+    -0.11584404325112699,
+    0.14220867762342096,
+    0.2804017297923565,
+    0.18867827099747958,
+    -0.3128292956738733,
+    -0.06684039272367953,
+    0.21606469061225653,
+    -0.1878887706901878,
+    0.32509690122678875,
+    0.1665364772081375,
+    0.05110035295365378,
+    -0.24309139875695107,
+    -0.14905344853177668,
+    0.014429311221465464,
+    -0.04873416791670025,
+    0.14548272781539706,
+    -0.08988374508335253,
+    -0.21444802945479752,
+    0.058564639464020726,
+    0.2721280400641262,
+    0.205003977753222,
+    -0.010262779332697397,
+    -0.0029813522472977617,
+    -0.15529807284474373,
+    0.26777649037539963,
+    -0.0713763989508152,
+    0.3913366436958313,
+    0.24202184118330478,
+    0.07944705467671154,
+    0.4742859028279781,
+    -0.024598410213366118,
+    -0.18506417192984376,
+    0.4029238263145089,
+    0.08335387408733368,
+    0.10739199253730473,
+    -0.13465733248740436,
+    -0.01804239540069829,
+    -0.0791158242151141,
+    0.19488584250211716,
+    0.02578564528375863,
+    -0.5237501479685307,
+    -0.07989736758172511,
+    0.5030703557655215,
+    0.10675456821918486,
+    0.2127919055521488,
+    0.09851150363683697,
+    -0.08418610896915199,
+    0.1229889305308461,
+    -0.43523957654833795,
+    0.4120137566700578,
+    0.4287545826286077,
+    0.2022662471514195,
+    -0.20844841264188288,
+    -0.35773375257849693,
+    0.08517274558544158,
+    -0.1674375392496586,
+    -0.014676835667341941,
+    -0.05585243180394173,
+    -0.5275113929063082,
+    0.630928717367351,
+    -0.01564715448766945,
+    -0.23996227737661685,
+    0.45196260644588615,
+    0.5890609898604452,
+    -0.6399751238524913,
+    -0.15936621921136973,
+    -0.7311209061648697,
+    0.2599357020109892,
+    -0.31862640250474217,
+    -0.04317740127444269,
+    -0.11731456399429589,
+    -0.25942440861836075,
+    -0.2941827371716499,
+    0.23730804622173307,
+    0.46029331209138036,
+    0.38680253662168984,
+    -0.047635487094521534,
+    0.09478947315365074,
+    -0.3081191055476665,
+    -0.1648157351184636,
+    0.6156397035345436,
+    -0.13436328768730166,
+    0.2214298250619322,
+    -0.13365367855876686,
+    0.5899101013317705,
+    -0.43101135436445476,
+    0.1790693347575143,
+    -0.07457639621570705,
+    -0.20997890923172235,
+    -0.12710947534069417,
+    0.6243484194390476,
+    -0.7802158012986182,
+    -0.2108477063477039,
+    0.26811757814139126,
+    0.06200872911140322,
+    0.41312811655006954,
+    -0.03243043515831233,
+    0.2404710978269577,
+    -0.2426718469709158,
+    0.18551481867325492,
+    -0.1008815299719572,
+    0.30738673652522264,
+    0.04650051929056642,
+    0.04460244094952941,
+    0.23529892023652793,
+    -0.10654573403298855,
+    -0.24890044219791893,
+    -0.20255712829530237,
+    -0.02652014940977096,
+    -0.09871285315603015,
+    0.2915390910580754,
+    0.27769559375010433,
+    -0.14592748656868934,
+    0.29195716343820094,
+    -0.07187115289270878,
+    0.1771868597716093,
+    0.041152336634695516,
+    -0.1439833148382604,
+    0.30823934525251384,
+    0.3235661891289055,
+    0.1609754763310775,
+    -0.34351673722267145,
+    -0.022873798222281028,
+    0.09521509874612094,
+    0.32318700947798795,
+    0.8087762531824411,
+    0.3908264026977122,
+    0.002011305466294286,
+    0.043111137486994265,
+    0.07222179947420956,
+    -0.16160998903214935,
+    0.9778514429926872,
+    -0.1190133649390191,
+    -0.06515133678913118,
+    0.07287682355381547,
+    -0.5366707997396588,
+    0.3294163831975311,
+    0.4730239138007164,
+    0.14623114340938625,
+    -0.04914769362658263,
+    0.19938274882733825,
+    -0.042190209974069144,
+    0.5218729123473167,
+    -0.09630445644725116,
+    -0.05060101728595327,
+    0.003836261900141802,
+    0.23963055023923516,
+    0.34797419500537213,
+    -0.24325519371777776,
+    0.2133896093349904,
+    -0.5972239149094094,
+    -0.5162009912542999
+  ],
+  "Lisa_Female_EN_US": [
+    0.098259643453639,
+    -0.16734164860099554,
+    0.06323453821241856,
+    -0.11800012588500977,
+    -0.11747334823012351,
+    0.1664506748318672,
+    -0.11684786230325699,
+    0.05554657885804772,
+    0.18881248405668885,
+    0.02363725304603577,
+    0.13073167139664293,
+    0.099481688067317,
+    -0.05592308223713189,
+    0.06073833145201206,
+    -0.06404643282294273,
+    0.03512822799384594,
+    0.08353134356439113,
+    -0.041701164841651914,
+    0.027301983349025248,
+    0.20779836773872373,
+    0.18608229857636616,
+    -0.18563928958028555,
+    -0.07058929353952408,
+    -0.0963012244552374,
+    0.07196986377239227,
+    -0.36310549527406694,
+    0.02321777753531933,
+    -0.08520180359482765,
+    0.1750676281750202,
+    -0.1034254938364029,
+    -0.10941653922200203,
+    0.10243654051446356,
+    -0.047365300357341766,
+    -0.13165730237960815,
+    -0.16502732019871474,
+    0.041227119415998464,
+    0.13208873476833105,
+    -0.1005905382335186,
+    -0.134281662479043,
+    -0.03663246519863605,
+    0.019146875315345823,
+    -0.17250166907906533,
+    -0.10810867324471474,
+    -0.1385631315410137,
+    -0.07121777702122926,
+    -0.8006405025720597,
+    -0.1598912551999092,
+    0.1796777807176113,
+    0.1533122271299362,
+    -0.013358959695324302,
+    0.12085846066474915,
+    0.08968418501317502,
+    -0.1647926703095436,
+    -0.03534330911934376,
+    0.018276208639144892,
+    -0.31622386574745176,
+    0.044433788210153584,
+    -0.3835114762187004,
+    0.06946051493287086,
+    -0.14754583239555358,
+    0.7700884103775024,
+    0.05088452622294426,
+    -0.10631981901824475,
+    -0.03452881909906864,
+    -0.18428492173552513,
+    0.01940714865922928,
+    0.15734580717980862,
+    0.24781300947070123,
+    0.11008071005344391,
+    -0.037437029182910926,
+    -0.02237723711878061,
+    -0.05610082112252712,
+    0.17097073495388032,
+    -0.029630468040704724,
+    -0.14090186282992362,
+    0.05233948081731796,
+    -0.11303650513291358,
+    0.09975283332169056,
+    0.07712901234626768,
+    -0.10158926621079445,
+    -0.006173290871083734,
+    -0.05509051866829395,
+    -0.02421657182276249,
+    0.12725706659257413,
+    0.0459196088835597,
+    0.024673170596361163,
+    -0.021687139011919498,
+    -0.0033121073618531255,
+    0.16617082729935645,
+    0.1039574097841978,
+    -0.48374021649360655,
+    -0.27473467141389846,
+    -0.2696342796087265,
+    -0.0147636947222054,
+    0.07800779491662979,
+    -0.17835728526115419,
+    0.07246882431209087,
+    -0.10726579539477825,
+    -0.04813114702701569,
+    -0.05245459228754044,
+    0.021175800473429263,
+    -0.028440106660127633,
+    0.09296442177146673,
+    0.06005613040179014,
+    -0.024471254087984562,
+    0.034369273111224174,
+    0.21559504568576812,
+    -0.06217287853360176,
+    0.020450182259082794,
+    -0.42201632708311076,
+    0.18088365010917187,
+    0.06422147378325462,
+    0.42920178174972534,
+    -0.15756667256355283,
+    0.07944225370883942,
+    0.07854075198993087,
+    -0.08394828196614981,
+    -0.02590339761227369,
+    0.0945357296615839,
+    0.013894812762737276,
+    -0.0668190572410822,
+    0.03327381014823913,
+    0.017544799577444793,
+    0.03966145385056734,
+    0.185294571146369,
+    -0.30561336129903793,
+    0.05225303545594215,
+    -0.20599330589175224,
+    -0.0247444950044155,
+    0.0681127518415451,
+    0.04429299384355545,
+    0.1615323081612587,
+    -0.11333180218935013,
+    0.05444136634469032,
+    -0.32481844425201417,
+    0.18828704990446568,
+    0.2872520424425602,
+    0.13895429372787474,
+    -0.08756729066371917,
+    0.24525217991322276,
+    0.03871614711242728,
+    0.11254438832402229,
+    -0.06629508603364229,
+    -0.10699533671140671,
+    -0.049380650371313096,
+    0.06740754097700119,
+    0.16857104301452636,
+    0.30641851425170896,
+    -0.0305225610733032,
+    -0.4179070383310318,
+    -0.10720842555165291,
+    0.2560294335708022,
+    -0.019710254669189464,
+    -0.17763112932443617,
+    -0.0029114261269569397,
+    -0.31358570866286756,
+    0.041973935812711714,
+    -0.29000549390912056,
+    -0.35404677540063856,
+    -0.007181685417890549,
+    0.05955917574465275,
+    -0.1936878278851509,
+    -0.4633562132716179,
+    0.021091651916503917,
+    -0.19877577703446148,
+    -0.035990026939543895,
+    -0.13621442914009094,
+    0.14864262491464614,
+    -0.32244770638644693,
+    -0.20621291697025299,
+    -0.34765296103432775,
+    -0.24935359358787537,
+    -0.14733021520078182,
+    -0.1826939433813095,
+    -0.12082219868898392,
+    -0.2630250319838524,
+    -0.25408093333244325,
+    -0.23697019964456556,
+    0.3174678087234497,
+    -0.13980808593332766,
+    0.15056745186448098,
+    0.19160462617874144,
+    0.05726437717676161,
+    -0.010723254084587096,
+    0.3363094590604305,
+    -0.5907457679510116,
+    0.2439893047325313,
+    -0.08692961037158967,
+    0.06740072220563889,
+    0.024416530132293696,
+    -0.2389059288892895,
+    0.04350413922220468,
+    0.1647874414920807,
+    -0.22662141621112825,
+    -0.22065756656229496,
+    0.2145439937710762,
+    -0.17922353893518447,
+    0.12061219662427902,
+    0.03737899707630277,
+    -0.021762318909168236,
+    0.05909155458211899,
+    0.11406668499112128,
+    -0.33404846489429474,
+    -0.006388737261295324,
+    -0.30795534551143644,
+    0.12133514666929841,
+    0.004110211133956904,
+    0.13224451523274183,
+    0.3036839559674263,
+    -0.21054075136780737,
+    0.0030692771077156025,
+    0.0035036206245422363,
+    0.011927027255296707,
+    0.08805800816044211,
+    0.0661589827388525,
+    0.17130252979695795,
+    0.09395805150270461,
+    0.26391741409897806,
+    -0.05089263916015625,
+    -0.11061666905879974,
+    0.13363431245088578,
+    -0.30518253389745953,
+    -0.04395102113485336,
+    -0.03574146777391435,
+    -0.23613579357042908,
+    -0.16882284134626388,
+    -0.30275803729891776,
+    -0.04444010443985462,
+    0.2190699848346412,
+    0.27886907644569875,
+    0.20610505752265454,
+    0.03134636655449867,
+    -0.06447610408067703,
+    -0.0005596891045570457,
+    0.26649302095174787,
+    -0.2057398557662964,
+    0.0030920282006263733,
+    -0.4122629433870315,
+    0.3565848290920257,
+    0.20677504390478132,
+    0.3942677197046578,
+    -0.3252736687660217,
+    0.0967718569561839,
+    0.03678370863199233,
+    -0.3206644430756569,
+    -0.10442807674407958,
+    0.09797048419713975,
+    0.5563426822423935,
+    0.04038007035851478,
+    0.41352313160896303,
+    0.3068622753024101,
+    0.11289987117052078,
+    0.19880020171403884,
+    0.0596605807542801,
+    0.11166647523641585,
+    -0.09573411736637354
+  ],
+  "Anne_Female_EN_US": [
+    0.17940405994304454,
+    -0.1644238923676312,
+    0.09324024524539709,
+    -0.12510012816637756,
+    -0.06630658619105817,
+    0.11119609288871288,
+    -0.16596424281597139,
+    0.08907460342161358,
+    0.19401996767846869,
+    0.01849436295451596,
+    0.14532765592448413,
+    0.08469446934759615,
+    -0.023078771238215272,
+    0.07181636142777278,
+    -0.15694392705336213,
+    0.026453089481219653,
+    0.14835394602268934,
+    -0.050624337047338486,
+    0.08605103651061653,
+    0.25574559848755596,
+    0.18944694046513177,
+    -0.335114658344537,
+    -0.10531004574149846,
+    -0.13781959619373083,
+    0.06695765908807516,
+    -0.43373020405415447,
+    -0.00021649692207574567,
+    -0.06944741196930408,
+    0.18373148031532766,
+    -0.18261400833725927,
+    -0.07242659293115139,
+    0.07962212040729355,
+    -0.054722306877374643,
+    -0.08814518945291638,
+    -0.18172899140045046,
+    0.07253306582570077,
+    0.12834971025440609,
+    -0.07147813141345978,
+    -0.03303390946239235,
+    -0.020850191079080108,
+    0.03093239173758775,
+    -0.17062115278095008,
+    -0.2042961787432432,
+    0.0001160103827714809,
+    -0.05252801310271025,
+    -0.7858508661389351,
+    -0.2091302715241909,
+    0.215912102162838,
+    0.1682404987514019,
+    -0.004957896983250976,
+    0.08353579475078732,
+    0.07429065436590462,
+    -0.2838975650956854,
+    -0.060729915578849616,
+    0.015331120043992993,
+    -0.3538943402469158,
+    0.06603590287268161,
+    -0.3598479990148917,
+    0.04949475321918727,
+    -0.1802994295954704,
+    0.8948932841420174,
+    0.06269775629043578,
+    0.14551597367972133,
+    -0.024429614841938015,
+    -0.18672503978013993,
+    0.0038162305951118525,
+    0.06440221983939409,
+    0.22779810093343258,
+    0.0648292437195778,
+    -0.06520362980663777,
+    -0.1480596019886434,
+    -0.07716014515608549,
+    0.19149241223931313,
+    -0.00472725033760071,
+    -0.15999614455504343,
+    0.05745576778426767,
+    -0.1102717611938715,
+    0.10388381499797106,
+    0.060168941505253315,
+    -0.059192010387778285,
+    0.16045401743613183,
+    -0.08206549435853958,
+    -0.00931916926056147,
+    0.07603645194321872,
+    -0.015994349215179678,
+    0.06288723759353161,
+    0.035409542825073,
+    -0.02822249522432685,
+    0.1511568885296583,
+    0.025192760489881047,
+    -0.45415958352386954,
+    -0.2641012085601687,
+    -0.09631151705980302,
+    0.05240553788607939,
+    -0.038502784445881844,
+    -0.1828939441591501,
+    0.08149223010987043,
+    -0.16405740920454265,
+    -0.0005170568823814475,
+    -0.02928877165541053,
+    0.04517688824562356,
+    0.006311735883355152,
+    0.07976922886446118,
+    0.10996842151507735,
+    -0.08294843146577477,
+    -0.04048346038907766,
+    0.20001366436481477,
+    -0.17603044249117372,
+    0.030919674783945097,
+    -0.45755103826522825,
+    0.27328743394464255,
+    0.14530749581754207,
+    0.37551659494638445,
+    -0.08780852369964123,
+    0.0881434208364226,
+    0.1102366984821856,
+    -0.10324715869501233,
+    -0.0326073732227087,
+    0.11423155306838453,
+    0.08385749866720289,
+    -0.044914178038015964,
+    0.03559637144207953,
+    0.15771918892860412,
+    0.1590451302938163,
+    0.2501667616888881,
+    -0.4137455578893423,
+    0.0505435885861516,
+    -0.23191697373986245,
+    0.0287872213870287,
+    0.003149333596229556,
+    0.10368789061903953,
+    -0.010588538646697995,
+    -0.0879323348402977,
+    -0.07506629079580307,
+    -0.5269412443041801,
+    0.22431598380208015,
+    0.3359779428690672,
+    0.0032832570374011962,
+    -0.050378158874809745,
+    0.3839163174852729,
+    0.028349736475502138,
+    -0.10097187757492063,
+    -0.11896620839834213,
+    -0.1368182884529233,
+    0.04117071777582168,
+    0.09757037162780761,
+    0.30170601047575474,
+    0.42509459182620046,
+    -0.031749222427606555,
+    -0.450509675219655,
+    -0.10203840397298336,
+    0.11731623336672783,
+    -0.14807355552911758,
+    -0.3516409188508987,
+    -0.014435897022485725,
+    -0.2427325451746583,
+    0.06217130366712809,
+    -0.38942934162914755,
+    -0.23018259108066563,
+    -0.046737963333725915,
+    0.21478279903531072,
+    -0.15159097351133824,
+    -0.6199001990258693,
+    0.11925626099109649,
+    -0.11402976419776678,
+    0.007391047988494388,
+    -0.00794237181544305,
+    0.3376276850700378,
+    -0.469353917054832,
+    -0.39055002434179187,
+    -0.5357521926518529,
+    -0.2898576606065035,
+    -0.07800486553460359,
+    -0.3499526508152485,
+    -0.09789482404012233,
+    -0.29802559399977324,
+    -0.0005799770355224831,
+    -0.16338682733476162,
+    0.3528595224022865,
+    -0.43563686497509474,
+    0.09255755357444287,
+    0.14223229270428417,
+    0.0687978647649288,
+    9.937696158886233e-05,
+    0.370262223854661,
+    -0.6324451595544816,
+    0.2665458579082042,
+    -0.1399991037324071,
+    0.12931404411792755,
+    0.16657713875174524,
+    -0.12588859747629613,
+    0.11505986778065562,
+    0.04677194319665433,
+    -0.3566686304286122,
+    -0.24695453979074955,
+    0.2476239986717701,
+    -0.5334096863865851,
+    -0.07400115504860877,
+    0.12062631538137794,
+    0.04516074173152446,
+    -0.052435807511210436,
+    0.3395687915384769,
+    -0.35318856965750456,
+    0.09023026153445243,
+    -0.33012407943606376,
+    0.1426998670678586,
+    0.16262518018484115,
+    0.1251251042820513,
+    0.46874684616923334,
+    -0.15137760601937772,
+    0.068039158731699,
+    -0.30281599573791024,
+    -0.2254273697733879,
+    0.27405579378828404,
+    0.17136543877422808,
+    0.10630016443319619,
+    0.057858095318079,
+    0.11517375223338605,
+    -0.15762499999254942,
+    -0.29063242860138416,
+    0.20158795565366744,
+    -0.29120013369247316,
+    -0.24734700098633766,
+    -0.11135954931378364,
+    0.0172116317320615,
+    -0.31362133994698527,
+    -0.22704790353309365,
+    0.04125624597072601,
+    0.2366799856070429,
+    0.1992180491797626,
+    0.2684565844014287,
+    0.2055516693741083,
+    -0.33117692880332467,
+    -0.031874293368309735,
+    0.38305166363716125,
+    -0.29617225378751755,
+    -0.04308449327945709,
+    -0.5536482103168965,
+    0.5467873688321561,
+    0.17487224414944647,
+    0.4601214074995369,
+    -0.21070712432265282,
+    0.14032430904917417,
+    0.04221225241199136,
+    -0.1693667344748974,
+    -0.05280606932938099,
+    0.19504989907145498,
+    0.5787762992084027,
+    -0.0508410669863224,
+    0.3892222262918949,
+    0.3818872168660164,
+    0.1461108922958374,
+    0.2555951376445591,
+    -0.23311877846717832,
+    0.03256144672632219,
+    0.08528051348403096
+  ]
+}