Spaces:
Running
Running
mireiafarrus
commited on
Commit
·
af7ac2b
1
Parent(s):
e6ec368
tacotron2 and hifigan upload
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- Decoder.py +380 -0
- Encoder.py +75 -0
- GST.py +370 -0
- LICENSE +29 -0
- MAIN.py +82 -0
- Postnet.py +54 -0
- Tacotron2.py +114 -0
- __init__.py +1 -0
- audio_processing.py +95 -0
- background_images/wallpaper_test.jpg +0 -0
- background_images/wallpaper_test_1_crop.jpg +0 -0
- background_images/wallpaper_test_1_crop_2.jpg +0 -0
- background_images/wallpaper_test_1_crop_3.jpg +0 -0
- background_images/wallpaper_test_2.jpg +0 -0
- background_images/wallpaper_test_2_crop.jpg +0 -0
- background_images/wallpaper_test_mod.jpg +0 -0
- background_images/wallpaper_test_mod_2.jpg +0 -0
- data_preparation.py +104 -0
- distributed.py +180 -0
- examples_taco2.py +7 -0
- filelists/ljs_audio_text_test_filelist.txt +500 -0
- filelists/ljs_audio_text_train_filelist.txt +0 -0
- filelists/ljs_audio_text_val_filelist.txt +100 -0
- fp16_optimizer.py +385 -0
- hifigan/LICENSE +21 -0
- hifigan/LJSpeech-1.1/training.txt +0 -0
- hifigan/LJSpeech-1.1/validation.txt +150 -0
- hifigan/README.md +105 -0
- hifigan/__pycache__/env.cpython-310.pyc +0 -0
- hifigan/__pycache__/models.cpython-310.pyc +0 -0
- hifigan/__pycache__/utils.cpython-310.pyc +0 -0
- hifigan/config_v1.json +37 -0
- hifigan/env.py +15 -0
- hifigan/inference.py +95 -0
- hifigan/meldataset.py +168 -0
- hifigan/models.py +283 -0
- hifigan/train.py +271 -0
- hifigan/utils.py +58 -0
- hyper_parameters.py +70 -0
- logger.py +47 -0
- loss_function.py +25 -0
- loss_scaler.py +79 -0
- models/checkpoint_78000.model +3 -0
- models/config.json +38 -0
- models/generator_v1 +3 -0
- models/nvidia_tacotron2_LJ11_epoch6400.pt +3 -0
- multiproc.py +23 -0
- nn_layers.py +107 -0
- plotting_utils.py +83 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
models/generator_v1 filter=lfs diff=lfs merge=lfs -text
|
Decoder.py
ADDED
@@ -0,0 +1,380 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch.autograd import Variable
|
3 |
+
from torch import nn
|
4 |
+
from torch.nn import functional as F
|
5 |
+
from nn_layers import linear_module, location_layer
|
6 |
+
from utils import get_mask_from_lengths
|
7 |
+
|
8 |
+
torch.manual_seed(1234)
|
9 |
+
|
10 |
+
class AttentionNet(nn.Module):
|
11 |
+
# 1024, 512, 128, 32, 31
|
12 |
+
def __init__(self, attention_rnn_dim, embedding_dim, attention_dim,
|
13 |
+
attention_location_n_filters, attention_location_kernel_size):
|
14 |
+
super(AttentionNet, self).__init__()
|
15 |
+
self.query_layer = linear_module(attention_rnn_dim, attention_dim,
|
16 |
+
bias=False, w_init_gain='tanh')
|
17 |
+
# Projecting inputs into 128-D hidden representation
|
18 |
+
self.memory_layer = linear_module(embedding_dim, attention_dim, bias=False,
|
19 |
+
w_init_gain='tanh')
|
20 |
+
# Projecting into 1-D scalar value
|
21 |
+
self.v = linear_module(attention_dim, 1, bias=False)
|
22 |
+
# Convolutional layers to obtain location features and projecting them into 128-D hidden representation
|
23 |
+
self.location_layer = location_layer(attention_location_n_filters,
|
24 |
+
attention_location_kernel_size,
|
25 |
+
attention_dim)
|
26 |
+
self.score_mask_value = -float("inf")
|
27 |
+
|
28 |
+
def get_alignment_energies(self, query, processed_memory,
|
29 |
+
attention_weights_cat):
|
30 |
+
"""
|
31 |
+
PARAMS
|
32 |
+
------
|
33 |
+
query: decoder output (batch, n_mel_channels * n_frames_per_step)
|
34 |
+
processed_memory: processed encoder outputs (B, T_in, attention_dim)
|
35 |
+
attention_weights_cat: cumulative and prev. att weights (B, 2, max_time)
|
36 |
+
|
37 |
+
RETURNS
|
38 |
+
-------
|
39 |
+
alignment (batch, max_time)
|
40 |
+
"""
|
41 |
+
|
42 |
+
processed_query = self.query_layer(query.unsqueeze(1))
|
43 |
+
processed_attention_weights = self.location_layer(attention_weights_cat)
|
44 |
+
energies = self.v(torch.tanh(
|
45 |
+
processed_query + processed_attention_weights + processed_memory))
|
46 |
+
|
47 |
+
energies = energies.squeeze(-1) # eliminates the third dimension of the tensor, which is 1.
|
48 |
+
return energies
|
49 |
+
|
50 |
+
def forward(self, attention_hidden_state, memory, processed_memory,
|
51 |
+
attention_weights_cat, mask):
|
52 |
+
"""
|
53 |
+
PARAMS
|
54 |
+
------
|
55 |
+
attention_hidden_state: attention rnn last output
|
56 |
+
memory: encoder outputs
|
57 |
+
processed_memory: processed encoder outputs
|
58 |
+
attention_weights_cat: previous and cummulative attention weights
|
59 |
+
mask: binary mask for padded data
|
60 |
+
"""
|
61 |
+
alignment = self.get_alignment_energies(
|
62 |
+
attention_hidden_state, processed_memory, attention_weights_cat)
|
63 |
+
|
64 |
+
if mask is not None:
|
65 |
+
alignment.data.masked_fill_(mask, self.score_mask_value)
|
66 |
+
|
67 |
+
attention_weights = F.softmax(alignment, dim=1)
|
68 |
+
# I think attention_weights is a [BxNUMENCINPUTS] so with unsequeeze(1): [Bx1xNUMENCINPUTS] and memory is
|
69 |
+
# [BxNUMENCINPUTSx512]
|
70 |
+
attention_context = torch.bmm(attention_weights.unsqueeze(1), memory)
|
71 |
+
attention_context = attention_context.squeeze(1)
|
72 |
+
|
73 |
+
return attention_context, attention_weights
|
74 |
+
|
75 |
+
|
76 |
+
class Prenet(nn.Module):
|
77 |
+
def __init__(self, in_dim, sizes):
|
78 |
+
super(Prenet, self).__init__()
|
79 |
+
in_sizes = [in_dim] + sizes[:-1] # all list values but the last one. The result is a list of the in_dim element
|
80 |
+
# concatenated with sizes of layers (i.e. [80, 256])
|
81 |
+
self.layers = nn.ModuleList(
|
82 |
+
[linear_module(in_size, out_size, bias=False)
|
83 |
+
for (in_size, out_size) in zip(in_sizes, sizes)])
|
84 |
+
|
85 |
+
def forward(self, x):
|
86 |
+
for linear in self.layers:
|
87 |
+
x = F.dropout(F.relu(linear(x)), p=0.5, training=True)
|
88 |
+
return x
|
89 |
+
|
90 |
+
|
91 |
+
class Decoder(nn.Module):
|
92 |
+
def __init__(self, tacotron_hyperparams):
|
93 |
+
super(Decoder, self).__init__()
|
94 |
+
self.n_mel_channels = tacotron_hyperparams['n_mel_channels']
|
95 |
+
self.n_frames_per_step = tacotron_hyperparams['number_frames_step']
|
96 |
+
self.encoder_embedding_dim = tacotron_hyperparams['encoder_embedding_dim']
|
97 |
+
self.attention_rnn_dim = tacotron_hyperparams['attention_rnn_dim'] # 1024
|
98 |
+
self.decoder_rnn_dim = tacotron_hyperparams['decoder_rnn_dim'] # 1024
|
99 |
+
self.prenet_dim = tacotron_hyperparams['prenet_dim']
|
100 |
+
self.max_decoder_steps = tacotron_hyperparams['max_decoder_steps']
|
101 |
+
# The threshold to decide whether stop or not stop decoding?
|
102 |
+
self.gate_threshold = tacotron_hyperparams['gate_threshold']
|
103 |
+
self.p_attention_dropout = tacotron_hyperparams['p_attention_dropout']
|
104 |
+
self.p_decoder_dropout = tacotron_hyperparams['p_decoder_dropout']
|
105 |
+
# Define the prenet: there is only one frame per step, so input dim is the number of mel channels.
|
106 |
+
# There are two fully connected layers:
|
107 |
+
self.prenet = Prenet(
|
108 |
+
tacotron_hyperparams['n_mel_channels'] * tacotron_hyperparams['number_frames_step'],
|
109 |
+
[tacotron_hyperparams['prenet_dim'], tacotron_hyperparams['prenet_dim']])
|
110 |
+
# input_size: 1024 + 512 (output of first LSTM cell + attention_context) / hidden_size: 1024
|
111 |
+
self.attention_rnn = nn.LSTMCell(
|
112 |
+
tacotron_hyperparams['prenet_dim'] + tacotron_hyperparams['encoder_embedding_dim'],
|
113 |
+
tacotron_hyperparams['attention_rnn_dim'])
|
114 |
+
# return attention_weights and attention_context. Does the alignments.
|
115 |
+
self.attention_layer = AttentionNet(
|
116 |
+
tacotron_hyperparams['attention_rnn_dim'], tacotron_hyperparams['encoder_embedding_dim'],
|
117 |
+
tacotron_hyperparams['attention_dim'], tacotron_hyperparams['attention_location_n_filters'],
|
118 |
+
tacotron_hyperparams['attention_location_kernel_size'])
|
119 |
+
# input_size: 256 + 512 (attention_context + prenet_info), hidden_size: 1024
|
120 |
+
self.decoder_rnn = nn.LSTMCell(
|
121 |
+
tacotron_hyperparams['attention_rnn_dim'] + tacotron_hyperparams['encoder_embedding_dim'],
|
122 |
+
tacotron_hyperparams['decoder_rnn_dim'], 1)
|
123 |
+
# (LSTM output)1024 + (attention_context)512, out_dim: number of mel channels. Last linear projection that
|
124 |
+
# generates an output decoder spectral frame.
|
125 |
+
self.linear_projection = linear_module(
|
126 |
+
tacotron_hyperparams['decoder_rnn_dim'] + tacotron_hyperparams['encoder_embedding_dim'],
|
127 |
+
tacotron_hyperparams['n_mel_channels']*tacotron_hyperparams['number_frames_step'])
|
128 |
+
# decision whether to continue decoding.
|
129 |
+
self.gate_layer = linear_module(
|
130 |
+
tacotron_hyperparams['decoder_rnn_dim'] + tacotron_hyperparams['encoder_embedding_dim'], 1,
|
131 |
+
bias=True, w_init_gain='sigmoid')
|
132 |
+
|
133 |
+
def get_go_frame(self, memory):
|
134 |
+
""" Gets all zeros frames to use as first decoder input
|
135 |
+
PARAMS
|
136 |
+
------
|
137 |
+
memory: decoder outputs
|
138 |
+
|
139 |
+
RETURNS
|
140 |
+
-------
|
141 |
+
decoder_input: all zeros frames
|
142 |
+
"""
|
143 |
+
B = memory.size(0)
|
144 |
+
decoder_input = Variable(memory.data.new(
|
145 |
+
B, self.n_mel_channels * self.n_frames_per_step).zero_())
|
146 |
+
return decoder_input
|
147 |
+
|
148 |
+
def initialize_decoder_states(self, memory, mask):
|
149 |
+
""" Initializes attention rnn states, decoder rnn states, attention
|
150 |
+
weights, attention cumulative weights, attention context, stores memory
|
151 |
+
and stores processed memory
|
152 |
+
PARAMS
|
153 |
+
------
|
154 |
+
memory: Encoder outputs
|
155 |
+
mask: Mask for padded data if training, expects None for inference
|
156 |
+
"""
|
157 |
+
B = memory.size(0)
|
158 |
+
MAX_TIME = memory.size(1)
|
159 |
+
|
160 |
+
self.attention_hidden = Variable(memory.data.new(
|
161 |
+
B, self.attention_rnn_dim).zero_())
|
162 |
+
self.attention_cell = Variable(memory.data.new(
|
163 |
+
B, self.attention_rnn_dim).zero_())
|
164 |
+
|
165 |
+
self.decoder_hidden = Variable(memory.data.new(
|
166 |
+
B, self.decoder_rnn_dim).zero_())
|
167 |
+
self.decoder_cell = Variable(memory.data.new(
|
168 |
+
B, self.decoder_rnn_dim).zero_())
|
169 |
+
|
170 |
+
self.attention_weights = Variable(memory.data.new(
|
171 |
+
B, MAX_TIME).zero_())
|
172 |
+
self.attention_weights_cum = Variable(memory.data.new(
|
173 |
+
B, MAX_TIME).zero_())
|
174 |
+
self.attention_context = Variable(memory.data.new(
|
175 |
+
B, self.encoder_embedding_dim).zero_())
|
176 |
+
|
177 |
+
self.memory = memory
|
178 |
+
self.processed_memory = self.attention_layer.memory_layer(memory)
|
179 |
+
self.mask = mask
|
180 |
+
|
181 |
+
def parse_decoder_inputs(self, decoder_inputs):
|
182 |
+
""" Prepares decoder inputs, i.e. mel outputs
|
183 |
+
PARAMS
|
184 |
+
------
|
185 |
+
decoder_inputs: inputs used for teacher-forced training, i.e. mel-specs
|
186 |
+
|
187 |
+
RETURNS
|
188 |
+
-------
|
189 |
+
inputs: processed decoder inputs
|
190 |
+
|
191 |
+
"""
|
192 |
+
# (B, n_mel_channels, T_out) -> (B, T_out, n_mel_channels)
|
193 |
+
decoder_inputs = decoder_inputs.transpose(1, 2)
|
194 |
+
# reshape decoder inputs in case we want to work with more than 1 frame per step (chunks). Otherwise, this next
|
195 |
+
# line does not just do anything
|
196 |
+
decoder_inputs = decoder_inputs.view(
|
197 |
+
decoder_inputs.size(0),
|
198 |
+
int(decoder_inputs.size(1)/self.n_frames_per_step), -1)
|
199 |
+
# (B, T_out, n_mel_channels) -> (T_out, B, n_mel_channels)
|
200 |
+
decoder_inputs = decoder_inputs.transpose(0, 1)
|
201 |
+
return decoder_inputs
|
202 |
+
|
203 |
+
def parse_decoder_outputs(self, mel_outputs, gate_outputs, alignments):
|
204 |
+
""" Prepares decoder outputs for output
|
205 |
+
PARAMS
|
206 |
+
------
|
207 |
+
mel_outputs:
|
208 |
+
gate_outputs: gate output energies
|
209 |
+
alignments:
|
210 |
+
|
211 |
+
RETURNS
|
212 |
+
-------
|
213 |
+
mel_outputs:
|
214 |
+
gate_outpust: gate output energies
|
215 |
+
alignments:
|
216 |
+
"""
|
217 |
+
# (T_out, B) -> (B, T_out)
|
218 |
+
alignments = torch.stack(alignments).transpose(0, 1)
|
219 |
+
# (T_out, B) -> (B, T_out)
|
220 |
+
gate_outputs = torch.stack(gate_outputs).transpose(0, 1)
|
221 |
+
gate_outputs = gate_outputs.contiguous()
|
222 |
+
# (T_out, B, n_mel_channels) -> (B, T_out, n_mel_channels)
|
223 |
+
mel_outputs = torch.stack(mel_outputs).transpose(0, 1).contiguous()
|
224 |
+
# decouple frames per step
|
225 |
+
mel_outputs = mel_outputs.view(
|
226 |
+
mel_outputs.size(0), -1, self.n_mel_channels)
|
227 |
+
# (B, T_out, n_mel_channels) -> (B, n_mel_channels, T_out)
|
228 |
+
mel_outputs = mel_outputs.transpose(1, 2)
|
229 |
+
|
230 |
+
return mel_outputs, gate_outputs, alignments
|
231 |
+
|
232 |
+
def decode(self, decoder_input):
|
233 |
+
""" Decoder step using stored states, attention and memory
|
234 |
+
PARAMS
|
235 |
+
------
|
236 |
+
decoder_input: previous mel output
|
237 |
+
|
238 |
+
RETURNS
|
239 |
+
-------
|
240 |
+
mel_output:
|
241 |
+
gate_output: gate output energies
|
242 |
+
attention_weights:
|
243 |
+
"""
|
244 |
+
# concatenates [Bx1024] and [Bx512]. All dimensions match except 1 (torch.cat -1)
|
245 |
+
# concatenate the i-th decoder hidden state together with the i-th attention context
|
246 |
+
cell_input = torch.cat((decoder_input, self.attention_context), -1)
|
247 |
+
# the previous input is for the following LSTM cell, initialized with zeroes the hidden states and the cell
|
248 |
+
# state.
|
249 |
+
# compute the (i+1)th attention hidden state based on the i-th decoder hidden state and attention context.
|
250 |
+
self.attention_hidden, self.attention_cell = self.attention_rnn(
|
251 |
+
cell_input, (self.attention_hidden, self.attention_cell))
|
252 |
+
self.attention_hidden = F.dropout(self.attention_hidden, self.p_attention_dropout, self.training)
|
253 |
+
self.attention_cell = F.dropout(self.attention_cell, self.p_attention_dropout, self.training)
|
254 |
+
# concatenate the i-th state attention weights together with the cumulated from previous states to compute
|
255 |
+
# (i+1)th state
|
256 |
+
attention_weights_cat = torch.cat(
|
257 |
+
(self.attention_weights.unsqueeze(1),
|
258 |
+
self.attention_weights_cum.unsqueeze(1)), dim=1)
|
259 |
+
# compute (i+1)th attention context and provide (i+1)th attention weights based on the (i+1)th attention hidden
|
260 |
+
# state and (i)th and prev. weights
|
261 |
+
self.attention_context, self.attention_weights = self.attention_layer(
|
262 |
+
self.attention_hidden, self.memory, self.processed_memory,
|
263 |
+
attention_weights_cat, self.mask)
|
264 |
+
|
265 |
+
# cumulate attention_weights adding the (i+1)th to compute (i+2)th state
|
266 |
+
self.attention_weights_cum += self.attention_weights
|
267 |
+
|
268 |
+
decoder_input = torch.cat((self.attention_hidden, self.attention_context), -1)
|
269 |
+
self.decoder_hidden, self.decoder_cell = self.decoder_rnn(decoder_input,
|
270 |
+
(self.decoder_hidden, self.decoder_cell))
|
271 |
+
self.decoder_hidden = F.dropout(self.decoder_hidden, self.p_decoder_dropout, self.training)
|
272 |
+
self.decoder_cell = F.dropout(self.decoder_cell, self.p_decoder_dropout, self.training)
|
273 |
+
|
274 |
+
decoder_hidden_attention_context = torch.cat((self.decoder_hidden, self.attention_context), dim=1)
|
275 |
+
decoder_output = self.linear_projection(decoder_hidden_attention_context)
|
276 |
+
|
277 |
+
gate_prediction = self.gate_layer(decoder_hidden_attention_context)
|
278 |
+
|
279 |
+
return decoder_output, gate_prediction, self.attention_weights
|
280 |
+
|
281 |
+
"""
|
282 |
+
# the decoder_output from ith step passes through the pre-net to compute new decoder hidden state and attention_
|
283 |
+
# context (i+1)th
|
284 |
+
prenet_output = self.prenet(decoder_input)
|
285 |
+
# the decoder_input now is the concatenation of the pre-net output and the new (i+1)th attention_context
|
286 |
+
decoder_input = torch.cat((prenet_output, self.attention_context), -1)
|
287 |
+
# another LSTM Cell to compute the decoder hidden (i+1)th state from the decoder_input
|
288 |
+
self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
|
289 |
+
decoder_input, (self.decoder_hidden, self.decoder_cell))
|
290 |
+
|
291 |
+
# with new attention_context we concatenate again with the new (i+1)th decoder_hidden state.
|
292 |
+
decoder_hidden_attention_context = torch.cat(
|
293 |
+
(self.decoder_hidden, self.attention_context), dim=1)
|
294 |
+
# the (i+1)th output is a linear projection of the decoder hidden state with a weight matrix plus bias.
|
295 |
+
decoder_output = self.linear_projection(
|
296 |
+
decoder_hidden_attention_context)
|
297 |
+
# check whether (i+1)th state is the last of the sequence
|
298 |
+
gate_prediction = self.gate_layer(decoder_hidden_attention_context)
|
299 |
+
return decoder_output, gate_prediction, self.attention_weights"""
|
300 |
+
|
301 |
+
def forward(self, memory, decoder_inputs, memory_lengths):
|
302 |
+
""" Decoder forward pass for training
|
303 |
+
PARAMS
|
304 |
+
------
|
305 |
+
memory: Encoder outputs
|
306 |
+
decoder_inputs: Decoder inputs for teacher forcing. i.e. mel-specs
|
307 |
+
memory_lengths: Encoder output lengths for attention masking.
|
308 |
+
|
309 |
+
RETURNS
|
310 |
+
-------
|
311 |
+
mel_outputs: mel outputs from the decoder
|
312 |
+
gate_outputs: gate outputs from the decoder
|
313 |
+
alignments: sequence of attention weights from the decoder
|
314 |
+
"""
|
315 |
+
|
316 |
+
decoder_input = self.get_go_frame(memory).unsqueeze(0)
|
317 |
+
decoder_inputs = self.parse_decoder_inputs(decoder_inputs)
|
318 |
+
decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0)
|
319 |
+
decoder_inputs = self.prenet(decoder_inputs)
|
320 |
+
|
321 |
+
self.initialize_decoder_states(
|
322 |
+
memory, mask=~get_mask_from_lengths(memory_lengths))
|
323 |
+
|
324 |
+
mel_outputs, gate_outputs, alignments = [], [], []
|
325 |
+
|
326 |
+
while len(mel_outputs) < decoder_inputs.size(0) - 1:
|
327 |
+
decoder_input = decoder_inputs[len(mel_outputs)]
|
328 |
+
mel_output, gate_output, attention_weights = self.decode(
|
329 |
+
decoder_input)
|
330 |
+
# a class list, when += means concatenation of vectors
|
331 |
+
mel_outputs += [mel_output.squeeze(1)]
|
332 |
+
gate_outputs += [gate_output.squeeze()]
|
333 |
+
alignments += [attention_weights]
|
334 |
+
# getting the frame indexing from reference mel frames to pass it as the new input of the next decoding
|
335 |
+
# step: Teacher Forcing!
|
336 |
+
# Takes each time_step of sequences of all mini-batch samples (i.e. [48, 80] as the decoder_inputs is
|
337 |
+
# parsed as [189, 48, 80]).
|
338 |
+
|
339 |
+
mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
|
340 |
+
mel_outputs, gate_outputs, alignments)
|
341 |
+
|
342 |
+
return mel_outputs, gate_outputs, alignments
|
343 |
+
|
344 |
+
def inference(self, memory):
|
345 |
+
""" Decoder inference
|
346 |
+
PARAMS
|
347 |
+
------
|
348 |
+
memory: Encoder outputs
|
349 |
+
|
350 |
+
RETURNS
|
351 |
+
-------
|
352 |
+
mel_outputs: mel outputs from the decoder
|
353 |
+
gate_outputs: gate outputs from the decoder
|
354 |
+
alignments: sequence of attention weights from the decoder
|
355 |
+
"""
|
356 |
+
decoder_input = self.get_go_frame(memory)
|
357 |
+
|
358 |
+
self.initialize_decoder_states(memory, mask=None)
|
359 |
+
|
360 |
+
mel_outputs, gate_outputs, alignments = [], [], []
|
361 |
+
while True:
|
362 |
+
decoder_input = self.prenet(decoder_input)
|
363 |
+
mel_output, gate_output, alignment = self.decode(decoder_input)
|
364 |
+
|
365 |
+
mel_outputs += [mel_output.squeeze(1)]
|
366 |
+
gate_outputs += [gate_output]
|
367 |
+
alignments += [alignment]
|
368 |
+
|
369 |
+
if torch.sigmoid(gate_output.data) > self.gate_threshold:
|
370 |
+
break
|
371 |
+
elif len(mel_outputs) == self.max_decoder_steps:
|
372 |
+
print("Warning! Reached max decoder steps")
|
373 |
+
break
|
374 |
+
|
375 |
+
decoder_input = mel_output
|
376 |
+
|
377 |
+
mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
|
378 |
+
mel_outputs, gate_outputs, alignments)
|
379 |
+
|
380 |
+
return mel_outputs, gate_outputs, alignments
|
Encoder.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.nn import functional as F
|
4 |
+
from nn_layers import convolutional_module
|
5 |
+
|
6 |
+
torch.manual_seed(1234)
|
7 |
+
|
8 |
+
class Encoder(nn.Module):
|
9 |
+
"""This is the encoder part of tacotron2. It includes a stack of three 1d convolutional layers
|
10 |
+
followed by batch normalization and ReLU activations, and a bidirectional LSTM layer.
|
11 |
+
These part encodes sequences of input characters."""
|
12 |
+
def __init__(self, encoder_params):
|
13 |
+
super(Encoder, self).__init__()
|
14 |
+
# we set the dropout applied at each convolutional layer, as specified in Tacotron2's paper
|
15 |
+
# self.dropout = nn.Dropout(0.5)
|
16 |
+
|
17 |
+
# A stack of convolution layers. For this model, there are 3 conv1d layers. We initialize a python
|
18 |
+
# list and run in a loop as many times as number of convolutional layers (three). In each
|
19 |
+
# iteration we initialize nn.Sequential container that permits us set a block of neural network
|
20 |
+
# modules. We need three equal nn sequences in a list. Then this list is properly registered using
|
21 |
+
# ModuleList class object (can act as an iterable, or be indexed).
|
22 |
+
# To see how the convolution is computed:
|
23 |
+
# https://pytorch.org/docs/stable/nn.html#conv1d
|
24 |
+
|
25 |
+
stack_of_convolutions = []
|
26 |
+
for _ in range(encoder_params['encoder_convs']):
|
27 |
+
conv_layer = nn.Sequential(convolutional_module(encoder_params['symbols_embedding_length'],
|
28 |
+
encoder_params['symbols_embedding_length'],
|
29 |
+
kernel_size=encoder_params['conv_kernel_size'],
|
30 |
+
stride=encoder_params['conv_stride'],
|
31 |
+
padding=int((encoder_params['conv_kernel_size'] - 1) / 2),
|
32 |
+
dilation=encoder_params['conv_dilation'],
|
33 |
+
w_init_gain=encoder_params['w_init_gain']),
|
34 |
+
nn.BatchNorm1d(encoder_params['symbols_embedding_length']))
|
35 |
+
stack_of_convolutions.append(conv_layer)
|
36 |
+
self.stack_conv = nn.ModuleList(stack_of_convolutions)
|
37 |
+
|
38 |
+
# Last part of the encoder is the bi-directional LSTM layer. As described in the original Tacotron2
|
39 |
+
# paper, there is only one BiLSTM layer with 256 units for each direction.
|
40 |
+
|
41 |
+
"""Can I add the bidirectional LSTM layer together with the convolutional stack??? CHECK IT OUT!"""
|
42 |
+
|
43 |
+
self.bi_lstm = nn.LSTM(encoder_params['symbols_embedding_length'],
|
44 |
+
int(encoder_params['symbols_embedding_length'] / 2), 1, batch_first=True,
|
45 |
+
bidirectional=True)
|
46 |
+
|
47 |
+
def forward(self, input_sequences, input_lengths):
|
48 |
+
for conv in self.stack_conv:
|
49 |
+
input_sequences = F.dropout(F.relu(conv(input_sequences)), 0.5, self.training)
|
50 |
+
|
51 |
+
input_sequences = input_sequences.transpose(1, 2)
|
52 |
+
# After convolution filters, is the original sequence length the same? CHECK IT OUT
|
53 |
+
input_lengths = input_lengths.cpu().numpy()
|
54 |
+
# Returns a packed sequence object with variable-length sequences before passing through BiLSTM layer
|
55 |
+
input_sequences = nn.utils.rnn.pack_padded_sequence(input_sequences, input_lengths, batch_first=True)
|
56 |
+
# nn.LSTM accepts packed variable length sequence tensors. The output will also return a packed variable
|
57 |
+
# length sequence tensor. The output dimension is (seq_length, batch, num_directions*hidden_size), but
|
58 |
+
# if batch_first is True, then (batch, seq_length, num_direction*hidden_size).
|
59 |
+
self.bi_lstm.flatten_parameters()
|
60 |
+
outputs, _ = self.bi_lstm(input_sequences)
|
61 |
+
# Pads again the tensor back to normal format before packing
|
62 |
+
outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs, batch_first=True)
|
63 |
+
|
64 |
+
return outputs # [N, Max_seq_length, E_length]
|
65 |
+
|
66 |
+
def inference(self, x):
|
67 |
+
for conv in self.stack_conv:
|
68 |
+
x = F.dropout(F.relu(conv(x)), 0.5, self.training)
|
69 |
+
|
70 |
+
x = x.transpose(1, 2)
|
71 |
+
|
72 |
+
self.bi_lstm.flatten_parameters()
|
73 |
+
outputs, _ = self.bi_lstm(x)
|
74 |
+
|
75 |
+
return outputs
|
GST.py
ADDED
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.init as init
|
4 |
+
import torch.nn.functional as F
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
torch.manual_seed(1234)
|
8 |
+
|
9 |
+
|
10 |
+
class GST(nn.Module):
|
11 |
+
|
12 |
+
def __init__(self, hyper_parameters):
|
13 |
+
|
14 |
+
super().__init__()
|
15 |
+
self.prosody_extractor = LogMelSpecReferenceEncoder()
|
16 |
+
self.stl = MultiSTL(hyper_parameters=hyper_parameters)
|
17 |
+
|
18 |
+
def forward(self, logmel_spec, logmel_lengths):
|
19 |
+
prosody_features_embedded = self.prosody_extractor(logmel_spec, logmel_lengths) # [N, 512]
|
20 |
+
style_embed, gst_scores = self.stl(prosody_features_embedded)
|
21 |
+
|
22 |
+
return style_embed, gst_scores
|
23 |
+
|
24 |
+
def inference(self, scores): # NEED TO DEFINE SCORES TENSOR DIMENSION!!
|
25 |
+
style_embed_inference = self.stl.inference(scores=scores)
|
26 |
+
|
27 |
+
return style_embed_inference
|
28 |
+
|
29 |
+
|
30 |
+
class PitchContourEncoder(nn.Module):
|
31 |
+
"""
|
32 |
+
|
33 |
+
"""
|
34 |
+
def __init__(self, hyper_parameters):
|
35 |
+
|
36 |
+
super().__init__()
|
37 |
+
|
38 |
+
K = len(hyper_parameters['ref_enc_out_channels'])
|
39 |
+
filters = [1] + hyper_parameters['ref_enc_out_channels']
|
40 |
+
kernel_sizes = hyper_parameters['seq_ref_enc_filter_size']
|
41 |
+
|
42 |
+
convs_2d = []
|
43 |
+
|
44 |
+
for i in range(K):
|
45 |
+
conv2d_init = nn.Conv2d(in_channels=filters[i], out_channels=filters[i + 1],
|
46 |
+
kernel_size=(kernel_sizes[i], 3), stride=(1, 1),
|
47 |
+
padding=(int((kernel_sizes[i] - 1) / 2), int((3 - 1) / 2)), bias=True)
|
48 |
+
|
49 |
+
nn.init.xavier_uniform_(conv2d_init.weight, gain=torch.nn.init.calculate_gain('linear'))
|
50 |
+
|
51 |
+
convs_2d.append(conv2d_init)
|
52 |
+
|
53 |
+
self.convs2D = nn.ModuleList(convs_2d)
|
54 |
+
|
55 |
+
self.bns2D = nn.ModuleList([nn.BatchNorm2d(num_features=hyper_parameters['ref_enc_out_channels'][i])
|
56 |
+
for i in range(K)])
|
57 |
+
|
58 |
+
# WEIGHT INITIALIZATION DEFAULT:
|
59 |
+
self.prosody_bi_lstm = nn.LSTM(input_size=int(176), hidden_size=int(512/2), num_layers=1, batch_first=True,
|
60 |
+
bidirectional=True)
|
61 |
+
|
62 |
+
def forward(self, bin_locations): # [N, BIN_SUBAND, LEN_MELSPEC] (BIN_SUBAND = 13)
|
63 |
+
N = bin_locations.size(0) # Number of samples
|
64 |
+
# Changing tensor dimensions to have 1 input channel for the first conv2D layer:
|
65 |
+
bin_locations = bin_locations.unsqueeze(1)
|
66 |
+
bin_locations = bin_locations.transpose(2, 3) # [N, 1, LEN_MELSPEC, BIN_SUBAND]
|
67 |
+
"""We implement ReLU gates at the output of Conv. layers. We could check it without"""
|
68 |
+
# For pitch tracking:
|
69 |
+
for conv2, bn2 in zip(self.convs2D, self.bns2D):
|
70 |
+
bin_locations = conv2(bin_locations)
|
71 |
+
bin_locations = bn2(bin_locations)
|
72 |
+
bin_locations = F.dropout(F.relu(bin_locations), 0.5, self.training) # [N, Cout, LEN_MELSPEC, BIN_SUBAND]
|
73 |
+
|
74 |
+
# Resize:
|
75 |
+
bin_locations = bin_locations.transpose(1, 2) # [N, LEN_MELSPEC, Cout, BIN_SUBAND]
|
76 |
+
T = bin_locations.size(1)
|
77 |
+
bin_locations = bin_locations.contiguous().view(N, T, -1) # [N, LEN_MELSPEC, Cout*BIN_SUBAND]
|
78 |
+
|
79 |
+
# Encode sequences into a bidirectional LSTM layer:
|
80 |
+
"""In our case, we do not care about the specific length of each sequence, as with the zero padding the encoder
|
81 |
+
should be able to also encode the different lengths and see zero when its over. That is why we do not apply
|
82 |
+
a packing padded sequence before LSTM layer."""
|
83 |
+
_, (encoded_prosody, cell_state) = self.prosody_bi_lstm(bin_locations)
|
84 |
+
|
85 |
+
encoded_prosody = encoded_prosody.transpose(0, 1)
|
86 |
+
encoded_prosody = encoded_prosody.contiguous().view(N, -1)
|
87 |
+
|
88 |
+
return encoded_prosody # should be [N, 512]
|
89 |
+
|
90 |
+
|
91 |
+
# DENSE GST Reference Encoder:
|
92 |
+
class ProsodyEncoder(nn.Module):
|
93 |
+
"""
|
94 |
+
This convolution class nn.Module performs two parallel convolution stacks, 1-D conv. and another 2-D conv.
|
95 |
+
Afterwards, the output of both will be concatenated to be passed, later, through a bidirectional LSTM layer.
|
96 |
+
"""
|
97 |
+
def __init__(self, hyper_parameters):
|
98 |
+
|
99 |
+
super().__init__()
|
100 |
+
|
101 |
+
K = len(hyper_parameters['ref_enc_out_channels'])
|
102 |
+
filters = [1] + hyper_parameters['ref_enc_out_channels']
|
103 |
+
kernel_sizes = hyper_parameters['seq_ref_enc_filter_size']
|
104 |
+
|
105 |
+
# I NEED TO ADJUST PADDING TO NOT LOSE THE TOTAL LENGTH OF SEQUENCE!!
|
106 |
+
convs_1d = []
|
107 |
+
convs_2d = []
|
108 |
+
|
109 |
+
for i in range(K):
|
110 |
+
conv1d_init = nn.Conv1d(in_channels=filters[i], out_channels=filters[i + 1],
|
111 |
+
kernel_size=kernel_sizes[i], stride=1,
|
112 |
+
padding=int((kernel_sizes[i] - 1) / 2), bias=True)
|
113 |
+
|
114 |
+
nn.init.xavier_uniform_(conv1d_init.weight, gain=torch.nn.init.calculate_gain('linear'))
|
115 |
+
|
116 |
+
convs_1d.append(conv1d_init)
|
117 |
+
|
118 |
+
conv2d_init = nn.Conv2d(in_channels=filters[i], out_channels=filters[i + 1],
|
119 |
+
kernel_size=(kernel_sizes[i], 3), stride=(1, 1),
|
120 |
+
padding=(int((kernel_sizes[i] - 1) / 2), int((3 - 1) / 2)), bias=True)
|
121 |
+
|
122 |
+
nn.init.xavier_uniform_(conv2d_init.weight, gain=torch.nn.init.calculate_gain('linear'))
|
123 |
+
|
124 |
+
convs_2d.append(conv2d_init)
|
125 |
+
|
126 |
+
self.convs1D = nn.ModuleList(convs_1d)
|
127 |
+
self.convs2D = nn.ModuleList(convs_2d)
|
128 |
+
|
129 |
+
self.bns1D = nn.ModuleList([nn.BatchNorm1d(num_features=hyper_parameters['ref_enc_out_channels'][i])
|
130 |
+
for i in range(K)])
|
131 |
+
self.bns2D = nn.ModuleList([nn.BatchNorm2d(num_features=hyper_parameters['ref_enc_out_channels'][i])
|
132 |
+
for i in range(K)])
|
133 |
+
|
134 |
+
self.prosody_linear = nn.Linear(512, 256, bias=True)
|
135 |
+
torch.nn.init.xavier_uniform_(self.prosody_linear.weight, gain=torch.nn.init.calculate_gain('linear'))
|
136 |
+
|
137 |
+
# WEIGHT INITIALIZATION DEFAULT:
|
138 |
+
self.prosody_bi_lstm = nn.LSTM(input_size=int(256), hidden_size=int(512/2), num_layers=1, batch_first=True,
|
139 |
+
bidirectional=True)
|
140 |
+
|
141 |
+
def forward(self, bin_locations, pitch_intensities): # [N, LEN_MELSPEC, 1], [N, LEN_MELSPEC, 3]
|
142 |
+
N = bin_locations.size(0) # Number of samples
|
143 |
+
num_intensities = pitch_intensities.size(2)
|
144 |
+
# Changing tensor dimensions to have 1 input channel for the first conv2D layer:
|
145 |
+
pitch_intensities = pitch_intensities.view(N, 1, -1, num_intensities) # [N, 1, LEN_MELSPEC, num_intensities]
|
146 |
+
bin_locations = bin_locations.transpose(1, 2) # [N, 1, LEN_MELSPEC]
|
147 |
+
"""We implement ReLU gates at the output of Conv. layers. We could check it without"""
|
148 |
+
# For pitch tracking:
|
149 |
+
for conv, bn in zip(self.convs1D, self.bns1D):
|
150 |
+
bin_locations = conv(bin_locations)
|
151 |
+
bin_locations = bn(bin_locations)
|
152 |
+
bin_locations = F.dropout(F.relu(bin_locations), 0.5, self.training) # [N, Cout, T]
|
153 |
+
|
154 |
+
# For pitch intensities:
|
155 |
+
for conv2, bn2 in zip(self.convs2D, self.bns2D):
|
156 |
+
pitch_intensities = conv2(pitch_intensities)
|
157 |
+
pitch_intensities = bn2(pitch_intensities)
|
158 |
+
pitch_intensities = F.dropout(F.relu(pitch_intensities), 0.5, self.training) # [N, Cout, T, bins]
|
159 |
+
|
160 |
+
# Resize pitch intensities
|
161 |
+
bin_locations = bin_locations.transpose(1, 2) # [N, T, Cout]
|
162 |
+
pitch_intensities = pitch_intensities.transpose(1, 2) # [N, T, Cout, bins]
|
163 |
+
T = pitch_intensities.size(1)
|
164 |
+
pitch_intensities = pitch_intensities.contiguous().view(N, T, -1) # [N, T, Cout*bins]
|
165 |
+
|
166 |
+
# Concatenate features
|
167 |
+
pitch_convolved = torch.cat((bin_locations, pitch_intensities), 2)
|
168 |
+
|
169 |
+
# Linear projection (IS IT NECESSARY? DOES ACTIVATION FUNCTION IMPROVE THE RESULT?)
|
170 |
+
projection_pitch_convolved = F.dropout(F.tanh(self.prosody_linear(pitch_convolved)), 0.5, self.training)
|
171 |
+
|
172 |
+
# Encode sequences into a bidirectional LSTM layer:
|
173 |
+
"""In our case, we do not care about the specific length of each sequence, as with the zero padding the encoder
|
174 |
+
should be able to also encode the different lengths and see zero when its over. That is why we do not apply
|
175 |
+
a packing padded sequence before LSTM layer."""
|
176 |
+
_, (encoded_prosody, cell_state) = self.prosody_bi_lstm(projection_pitch_convolved)
|
177 |
+
|
178 |
+
encoded_prosody = encoded_prosody.transpose(0, 1)
|
179 |
+
encoded_prosody = encoded_prosody.contiguous().view(N, -1)
|
180 |
+
|
181 |
+
return encoded_prosody # should be [N, 512]
|
182 |
+
|
183 |
+
|
184 |
+
class LogMelSpecReferenceEncoder(nn.Module):
|
185 |
+
"""
|
186 |
+
"""
|
187 |
+
def __init__(self):
|
188 |
+
|
189 |
+
super().__init__()
|
190 |
+
|
191 |
+
reference_encoder_out_channels = [32, 32, 64, 64, 128, 128]
|
192 |
+
K = len(reference_encoder_out_channels)
|
193 |
+
filters = [1] + reference_encoder_out_channels
|
194 |
+
kernel_size = (3, 3)
|
195 |
+
stride = (2, 2)
|
196 |
+
padding = (1, 1)
|
197 |
+
|
198 |
+
convs_2d = []
|
199 |
+
|
200 |
+
for i in range(K):
|
201 |
+
conv2d_init = nn.Conv2d(in_channels=filters[i], out_channels=filters[i + 1],
|
202 |
+
kernel_size=kernel_size, stride=stride,
|
203 |
+
padding=padding, bias=True)
|
204 |
+
|
205 |
+
nn.init.xavier_uniform_(conv2d_init.weight, gain=torch.nn.init.calculate_gain('linear'))
|
206 |
+
|
207 |
+
convs_2d.append(conv2d_init)
|
208 |
+
|
209 |
+
self.convs2D = nn.ModuleList(convs_2d)
|
210 |
+
self.bns2D = nn.ModuleList([nn.BatchNorm2d(num_features=reference_encoder_out_channels[i])
|
211 |
+
for i in range(K)])
|
212 |
+
|
213 |
+
out_channels = self.calculate_channels(80, 3, 2, 1, K)
|
214 |
+
# self.gru = nn.GRU(input_size=reference_encoder_out_channels[-1] * out_channels, hidden_size=512,
|
215 |
+
# batch_first=True, bidirectional=False)
|
216 |
+
|
217 |
+
# WEIGHT INITIALIZATION DEFAULT:
|
218 |
+
self.bi_lstm = nn.LSTM(input_size=reference_encoder_out_channels[-1] * out_channels,
|
219 |
+
hidden_size=int(512/2), num_layers=1, batch_first=True, bidirectional=True)
|
220 |
+
|
221 |
+
def forward(self, logmel_spec, logmel_lengths): # [N, MEL_CHANNELS, LEN_MELSPEC]
|
222 |
+
N = logmel_spec.size(0) # Number of samples
|
223 |
+
# Changing tensor dimensions to have 1 input channel for the first conv2D layer:
|
224 |
+
logmel_spec = logmel_spec.unsqueeze(1)
|
225 |
+
logmel_spec = logmel_spec.transpose(2, 3) # [N, 1, LEN_MELSPEC, MEL_CHANNELS]
|
226 |
+
"""We implement ReLU gates at the output of Conv. layers. We could check it without"""
|
227 |
+
for conv2, bn2 in zip(self.convs2D, self.bns2D):
|
228 |
+
logmel_spec = conv2(logmel_spec)
|
229 |
+
logmel_spec = bn2(logmel_spec)
|
230 |
+
logmel_spec = F.dropout(F.relu(logmel_spec), 0.5, self.training) # [N, Cout, LEN_MELSPEC, BIN_SUBAND]
|
231 |
+
|
232 |
+
# Resize:
|
233 |
+
logmel_spec = logmel_spec.transpose(1, 2) # [N, LEN_MELSPEC, Cout, MEL_CHANNELS]
|
234 |
+
T = logmel_spec.size(1)
|
235 |
+
logmel_spec = logmel_spec.contiguous().view(N, T, -1) # [N, LEN_MELSPEC, Cout*BIN_SUBAND]
|
236 |
+
|
237 |
+
logmel_lengths = logmel_lengths.cpu().numpy()
|
238 |
+
last_hidden_states = torch.zeros(N, 512)
|
239 |
+
|
240 |
+
logmel_after_lengths = np.trunc(logmel_lengths / 2**6)
|
241 |
+
logmel_after_lengths = logmel_after_lengths + 1
|
242 |
+
logmel_after_lengths = logmel_after_lengths.astype(int)
|
243 |
+
logmel_after_lengths = torch.tensor(logmel_after_lengths)
|
244 |
+
# logmel_spec = nn.utils.rnn.pack_padded_sequence(logmel_spec, logmel_after_lengths, batch_first=True)
|
245 |
+
self.bi_lstm.flatten_parameters()
|
246 |
+
# memory, out = self.gru(logmel_spec)
|
247 |
+
outputs, (hidden_states, cell_state) = self.bi_lstm(logmel_spec)
|
248 |
+
hidden_states = hidden_states.transpose(0, 1)
|
249 |
+
hidden_states = hidden_states.contiguous().view(N, -1)
|
250 |
+
# outputs, _ = nn.utils.rnn.pad_packed_sequence(output, batch_first=True)
|
251 |
+
|
252 |
+
# for j in range(N):
|
253 |
+
# last_hidden_states[j, :] = outputs[j, logmel_after_lengths[j] - 1, :]
|
254 |
+
|
255 |
+
# return last_hidden_states.cuda(non_blocking=True)
|
256 |
+
return hidden_states
|
257 |
+
|
258 |
+
def calculate_channels(self, L, kernel_size, stride, padding, n_convs):
|
259 |
+
for i in range(n_convs):
|
260 |
+
L = (L - kernel_size + 2 * padding) // stride + 1
|
261 |
+
return L
|
262 |
+
|
263 |
+
|
264 |
+
# BASIC FORM FOR NOW. NEEDS TO BE EXPANDED TO OUR NEW PROPOSAL
|
265 |
+
class MultiSTL(nn.Module):
|
266 |
+
|
267 |
+
"""
|
268 |
+
inputs --- [N, E]
|
269 |
+
"""
|
270 |
+
|
271 |
+
def __init__(self, hyper_parameters):
|
272 |
+
|
273 |
+
super().__init__()
|
274 |
+
# E = 256 / num_heads = 8 / token_num = 10!!
|
275 |
+
self.embed = nn.Parameter(torch.FloatTensor(hyper_parameters['token_num'],
|
276 |
+
hyper_parameters['E'] // hyper_parameters['num_heads']))
|
277 |
+
# d_q = hyper_parameters['E'] // 2
|
278 |
+
d_q = hyper_parameters['E']
|
279 |
+
d_k = hyper_parameters['E'] // hyper_parameters['num_heads']
|
280 |
+
|
281 |
+
self.attention = MultiHeadAttention(query_dim=d_q, key_dim=d_k,
|
282 |
+
num_units=hyper_parameters['E'], num_heads=hyper_parameters['num_heads'])
|
283 |
+
|
284 |
+
init.xavier_uniform_(self.embed, gain=init.calculate_gain('linear'))
|
285 |
+
|
286 |
+
def forward(self, inputs):
|
287 |
+
N = inputs.size(0) # Number of samples in the batch
|
288 |
+
query = inputs.unsqueeze(1) # [N, 1, E]
|
289 |
+
keys = F.tanh(self.embed).unsqueeze(0).expand(N, -1, -1) # [N, token_num, E // num_heads]
|
290 |
+
style_embed, gst_scores = self.attention(query, keys)
|
291 |
+
|
292 |
+
return style_embed, gst_scores
|
293 |
+
|
294 |
+
def inference(self, scores):
|
295 |
+
keys = F.tanh(self.embed).unsqueeze(0)
|
296 |
+
style_embed_inference = self.attention.inference(keys, scores=scores)
|
297 |
+
|
298 |
+
return style_embed_inference
|
299 |
+
|
300 |
+
|
301 |
+
class MultiHeadAttention(nn.Module):
|
302 |
+
"""
|
303 |
+
input:
|
304 |
+
query --- [N, T_q, query_dim] T_q = 1
|
305 |
+
key --- [N, T_k, key_dim] T_k = 5 (num of tokens)
|
306 |
+
output:
|
307 |
+
out --- [N, T_q, num_units]
|
308 |
+
"""
|
309 |
+
|
310 |
+
def __init__(self, query_dim, key_dim, num_units, num_heads):
|
311 |
+
|
312 |
+
super().__init__()
|
313 |
+
self.num_units = num_units
|
314 |
+
self.num_heads = num_heads
|
315 |
+
self.key_dim = key_dim
|
316 |
+
#self.sparse_max = Sparsemax(dim=3)
|
317 |
+
|
318 |
+
# Linear projection of data (encoder and decoder states) into a fixed number of hidden units
|
319 |
+
self.W_query = nn.Linear(in_features=query_dim, out_features=num_units, bias=False)
|
320 |
+
self.W_key = nn.Linear(in_features=key_dim, out_features=num_units, bias=False)
|
321 |
+
self.W_value = nn.Linear(in_features=key_dim, out_features=num_units, bias=False)
|
322 |
+
|
323 |
+
def forward(self, query, key):
|
324 |
+
|
325 |
+
querys = self.W_query(query) # [N, T_q, num_units] the last dimension changes according to the output dim
|
326 |
+
keys = self.W_key(key) # [N, T_k, num_units]
|
327 |
+
values = self.W_value(key)
|
328 |
+
|
329 |
+
# the number of units set at the initialization is the total of hidden feature units we want. Then, we will
|
330 |
+
# assign a specific number of num_units according to the number of heads of the multi head Attention.
|
331 |
+
|
332 |
+
# Basically, style tokens are the number of heads we configure to learn different types of attention
|
333 |
+
#
|
334 |
+
split_size = self.num_units // self.num_heads # integer division, without remainder
|
335 |
+
querys = torch.stack(torch.split(querys, split_size, dim=2), dim=0) # [h, N, T_q, num_units/h]
|
336 |
+
keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h]
|
337 |
+
values = torch.stack(torch.split(values, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h]
|
338 |
+
|
339 |
+
# score = softmax(QK^T / (d_k ** 0.5))
|
340 |
+
scores = torch.matmul(querys, keys.transpose(2, 3)) # [h, N, T_q, T_k]
|
341 |
+
scores = scores / (self.key_dim ** 0.33) # cube root instead of square to prevent very small values
|
342 |
+
scores = F.softmax(scores, dim=3) # From dimension 3, length of Key sequences.
|
343 |
+
# scores = self.sparse_max(scores)
|
344 |
+
out = torch.matmul(scores, values) # [h, N, T_q, num_units/h]
|
345 |
+
out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0) # [N, T_q, num_units]
|
346 |
+
scores = scores.squeeze()
|
347 |
+
|
348 |
+
return out, scores
|
349 |
+
|
350 |
+
def inference(self, key, scores): # key [1, 5, 512/8] # [1, num_tokens]
|
351 |
+
"""Only need the keys that are already trained, and the scores that I impose"""
|
352 |
+
scores = scores.unsqueeze(0).unsqueeze(0).unsqueeze(0).expand(self.num_heads, -1, -1, -1)
|
353 |
+
# print(scores.shape)
|
354 |
+
values = self.W_value(key)
|
355 |
+
|
356 |
+
# the number of units set at the initialization is the total of hidden feature units we want. Then, we will
|
357 |
+
# assign a specific number of num_units according to the number of heads of the multi head Attention.
|
358 |
+
|
359 |
+
# Basically, style tokens are the number of heads we configure to learn different types of attention
|
360 |
+
#
|
361 |
+
split_size = self.num_units // self.num_heads # integer division, without remainder
|
362 |
+
values = torch.stack(torch.split(values, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h]
|
363 |
+
|
364 |
+
# score = softmax(QK^T / (d_k ** 0.5))
|
365 |
+
|
366 |
+
# out = score * V
|
367 |
+
out = torch.matmul(scores, values) # [h, 1, T_q = 1, num_units/h]
|
368 |
+
out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0) # [N, T_q, num_units]
|
369 |
+
|
370 |
+
return out
|
LICENSE
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 3-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2018, NVIDIA Corporation
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are met:
|
8 |
+
|
9 |
+
* Redistributions of source code must retain the above copyright notice, this
|
10 |
+
list of conditions and the following disclaimer.
|
11 |
+
|
12 |
+
* Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
this list of conditions and the following disclaimer in the documentation
|
14 |
+
and/or other materials provided with the distribution.
|
15 |
+
|
16 |
+
* Neither the name of the copyright holder nor the names of its
|
17 |
+
contributors may be used to endorse or promote products derived from
|
18 |
+
this software without specific prior written permission.
|
19 |
+
|
20 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
MAIN.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
######################################################################################################
|
2 |
+
# The main script where the data preparation, training and evaluation happens.
|
3 |
+
######################################################################################################
|
4 |
+
|
5 |
+
import torch
|
6 |
+
from torch.utils.data import DataLoader
|
7 |
+
from torch.utils.data.distributed import DistributedSampler
|
8 |
+
|
9 |
+
from hyper_parameters import tacotron_params
|
10 |
+
from data_preparation import DataPreparation, DataCollate
|
11 |
+
from training import train
|
12 |
+
|
13 |
+
torch.manual_seed(1234)
|
14 |
+
|
15 |
+
|
16 |
+
if __name__ == '__main__':
|
17 |
+
# run()
|
18 |
+
# ---------------------------------------- DEFINING INPUT ARGUMENTS ---------------------------------------------- #
|
19 |
+
|
20 |
+
training_files = 'filelists/ljs_audio_text_train_filelist.txt'
|
21 |
+
validation_files = 'filelists/ljs_audio_text_val_filelist.txt'
|
22 |
+
|
23 |
+
output_directory = '/homedtic/apeiro/GST_Tacotron2_ORIGINAL/outputs'
|
24 |
+
# log_directory = '/homedtic/apeiro/GST_Tacotron2_pitch_prosody_dense/loggs'
|
25 |
+
log_directory = '/tmp/loggs_GST_ORIGINAL/'
|
26 |
+
# checkpoint_path = '/homedtic/apeiro/GST_Tacotron2_only_pitch_contour_dense_SoftMax/outputs/checkpoint_62000'
|
27 |
+
checkpoint_path = None
|
28 |
+
warm_start = False
|
29 |
+
n_gpus = 1
|
30 |
+
rank = 0
|
31 |
+
|
32 |
+
torch.backends.cudnn.enabled = tacotron_params['cudnn_enabled']
|
33 |
+
torch.backends.cudnn.benchmark = tacotron_params['cudnn_benchmark']
|
34 |
+
|
35 |
+
print("FP16 Run:", tacotron_params['fp16_run'])
|
36 |
+
print("Dynamic Loss Scaling:", tacotron_params['dynamic_loss_scaling'])
|
37 |
+
print("Distributed Run:", tacotron_params['distributed_run'])
|
38 |
+
print("CUDNN Enabled:", tacotron_params['cudnn_enabled'])
|
39 |
+
print("CUDNN Benchmark:", tacotron_params['cudnn_benchmark'])
|
40 |
+
|
41 |
+
# --------------------------------------------- PREPARING DATA --------------------------------------------------- #
|
42 |
+
|
43 |
+
# Read the training files
|
44 |
+
with open(training_files, encoding='utf-8') as f:
|
45 |
+
training_audiopaths_and_text = [line.strip().split("|") for line in f]
|
46 |
+
# if tacotron_params['sort_by_length']:
|
47 |
+
# training_audiopaths_and_text.sort(key=lambda x: len(x[1]))
|
48 |
+
|
49 |
+
# Read the validation files
|
50 |
+
with open(validation_files, encoding='utf-8') as f:
|
51 |
+
validation_audiopaths_and_text = [line.strip().split("|") for line in f]
|
52 |
+
# if tacotron_params['sort_by_length']:
|
53 |
+
# validation_audiopaths_and_text.sort(key=lambda x: len(x[1]))
|
54 |
+
|
55 |
+
# prepare the data
|
56 |
+
# GST adaptation to put prosody features path as an input argument:
|
57 |
+
train_data = DataPreparation(training_audiopaths_and_text, tacotron_params)
|
58 |
+
validation_data = DataPreparation(validation_audiopaths_and_text, tacotron_params)
|
59 |
+
collate_fn = DataCollate(tacotron_params['number_frames_step'])
|
60 |
+
|
61 |
+
# DataLoader prepares a loader for a set of data including a function that processes every
|
62 |
+
# batch as we wish (collate_fn). This creates an object with which we can list the batches created.
|
63 |
+
# DataLoader and Dataset (IMPORTANT FOR FURTHER DESIGNS WITH OTHER DATABASES)
|
64 |
+
# https://jdhao.github.io/2017/10/23/pytorch-load-data-and-make-batch/
|
65 |
+
|
66 |
+
train_sampler = DistributedSampler(train_data) if tacotron_params['distributed_run'] else None
|
67 |
+
val_sampler = DistributedSampler(validation_data) if tacotron_params['distributed_run'] else None
|
68 |
+
|
69 |
+
train_loader = DataLoader(train_data, num_workers=1, shuffle=False, sampler=train_sampler,
|
70 |
+
batch_size=tacotron_params['batch_size'], pin_memory=False, drop_last=True,
|
71 |
+
collate_fn=collate_fn)
|
72 |
+
|
73 |
+
validate_loader = DataLoader(validation_data, num_workers=1, shuffle=False, sampler=val_sampler,
|
74 |
+
batch_size=tacotron_params['batch_size'], pin_memory=False, drop_last=True,
|
75 |
+
collate_fn=collate_fn)
|
76 |
+
|
77 |
+
# ------------------------------------------------- TRAIN -------------------------------------------------------- #
|
78 |
+
|
79 |
+
train(output_directory, log_directory, checkpoint_path, warm_start, n_gpus, rank, hyper_params=tacotron_params,
|
80 |
+
valset=validation_data, collate_fn=collate_fn, train_loader=train_loader, group_name="group_name")
|
81 |
+
|
82 |
+
print("Training completed")
|
Postnet.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.nn import functional as F
|
4 |
+
from nn_layers import convolutional_module
|
5 |
+
|
6 |
+
torch.manual_seed(1234)
|
7 |
+
|
8 |
+
|
9 |
+
class Postnet(nn.Module):
|
10 |
+
"""Postnet
|
11 |
+
- Five 1-d convolution with 512 channels and kernel size 5
|
12 |
+
"""
|
13 |
+
|
14 |
+
def __init__(self, tacotron_hyperparams):
|
15 |
+
super(Postnet, self).__init__()
|
16 |
+
# self.dropout = nn.Dropout(0.5)
|
17 |
+
self.convolutions = nn.ModuleList()
|
18 |
+
|
19 |
+
self.convolutions.append(
|
20 |
+
nn.Sequential(
|
21 |
+
convolutional_module(tacotron_hyperparams['n_mel_channels'],
|
22 |
+
tacotron_hyperparams['postnet_embedding_dim'],
|
23 |
+
kernel_size=tacotron_hyperparams['postnet_kernel_size'], stride=1,
|
24 |
+
padding=int((tacotron_hyperparams['postnet_kernel_size'] - 1) / 2),
|
25 |
+
dilation=1, w_init_gain='tanh'),
|
26 |
+
nn.BatchNorm1d(tacotron_hyperparams['postnet_embedding_dim']))
|
27 |
+
)
|
28 |
+
|
29 |
+
for i in range(1, tacotron_hyperparams['postnet_n_convolutions'] - 1):
|
30 |
+
self.convolutions.append(
|
31 |
+
nn.Sequential(
|
32 |
+
convolutional_module(tacotron_hyperparams['postnet_embedding_dim'],
|
33 |
+
tacotron_hyperparams['postnet_embedding_dim'],
|
34 |
+
kernel_size=tacotron_hyperparams['postnet_kernel_size'], stride=1,
|
35 |
+
padding=int((tacotron_hyperparams['postnet_kernel_size'] - 1) / 2),
|
36 |
+
dilation=1, w_init_gain='tanh'),
|
37 |
+
nn.BatchNorm1d(tacotron_hyperparams['postnet_embedding_dim']))
|
38 |
+
)
|
39 |
+
|
40 |
+
self.convolutions.append(
|
41 |
+
nn.Sequential(
|
42 |
+
convolutional_module(tacotron_hyperparams['postnet_embedding_dim'],
|
43 |
+
tacotron_hyperparams['n_mel_channels'],
|
44 |
+
kernel_size=tacotron_hyperparams['postnet_kernel_size'], stride=1,
|
45 |
+
padding=int((tacotron_hyperparams['postnet_kernel_size'] - 1) / 2),
|
46 |
+
dilation=1, w_init_gain='linear'),
|
47 |
+
nn.BatchNorm1d(tacotron_hyperparams['n_mel_channels']))
|
48 |
+
)
|
49 |
+
|
50 |
+
def forward(self, x):
|
51 |
+
for i in range(len(self.convolutions) - 1):
|
52 |
+
x = F.dropout(torch.tanh(self.convolutions[i](x)), 0.5, self.training)
|
53 |
+
x = F.dropout(self.convolutions[-1](x), 0.5, self.training)
|
54 |
+
return x
|
Tacotron2.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from math import sqrt
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from torch import nn
|
5 |
+
|
6 |
+
from Encoder import Encoder
|
7 |
+
from Decoder import Decoder
|
8 |
+
from Postnet import Postnet
|
9 |
+
from GST import GST
|
10 |
+
|
11 |
+
from utils import to_gpu, get_mask_from_lengths
|
12 |
+
from fp16_optimizer import fp32_to_fp16, fp16_to_fp32
|
13 |
+
|
14 |
+
torch.manual_seed(1234)
|
15 |
+
|
16 |
+
|
17 |
+
class tacotron_2(nn.Module):
|
18 |
+
def __init__(self, tacotron_hyperparams):
|
19 |
+
super(tacotron_2, self).__init__()
|
20 |
+
self.mask_padding = tacotron_hyperparams['mask_padding']
|
21 |
+
self.fp16_run = tacotron_hyperparams['fp16_run']
|
22 |
+
self.n_mel_channels = tacotron_hyperparams['n_mel_channels']
|
23 |
+
self.n_frames_per_step = tacotron_hyperparams['number_frames_step']
|
24 |
+
self.embedding = nn.Embedding(
|
25 |
+
tacotron_hyperparams['n_symbols'], tacotron_hyperparams['symbols_embedding_length'])
|
26 |
+
# CHECK THIS OUT!!!
|
27 |
+
std = sqrt(2.0 / (tacotron_hyperparams['n_symbols'] + tacotron_hyperparams['symbols_embedding_length']))
|
28 |
+
val = sqrt(3.0) * std
|
29 |
+
self.embedding.weight.data.uniform_(-val, val)
|
30 |
+
self.encoder = Encoder(tacotron_hyperparams)
|
31 |
+
self.decoder = Decoder(tacotron_hyperparams)
|
32 |
+
self.postnet = Postnet(tacotron_hyperparams)
|
33 |
+
self.gst = GST(tacotron_hyperparams)
|
34 |
+
|
35 |
+
def parse_batch(self, batch):
|
36 |
+
# GST I add the new tensor from prosody features to train GST tokens:
|
37 |
+
text_padded, input_lengths, mel_padded, gate_padded, output_lengths, prosody_padded = batch
|
38 |
+
text_padded = to_gpu(text_padded).long()
|
39 |
+
max_len = int(torch.max(input_lengths.data).item()) # With item() you get the pure value (not in a tensor)
|
40 |
+
input_lengths = to_gpu(input_lengths).long()
|
41 |
+
mel_padded = to_gpu(mel_padded).float()
|
42 |
+
gate_padded = to_gpu(gate_padded).float()
|
43 |
+
output_lengths = to_gpu(output_lengths).long()
|
44 |
+
prosody_padded = to_gpu(prosody_padded).float()
|
45 |
+
|
46 |
+
return (
|
47 |
+
(text_padded, input_lengths, mel_padded, max_len, output_lengths, prosody_padded),
|
48 |
+
(mel_padded, gate_padded))
|
49 |
+
|
50 |
+
def parse_input(self, inputs):
|
51 |
+
inputs = fp32_to_fp16(inputs) if self.fp16_run else inputs
|
52 |
+
return inputs
|
53 |
+
|
54 |
+
def parse_output(self, outputs, output_lengths=None):
|
55 |
+
if self.mask_padding and output_lengths is not None:
|
56 |
+
mask = ~get_mask_from_lengths(output_lengths)
|
57 |
+
mask = mask.expand(self.n_mel_channels, mask.size(0), mask.size(1))
|
58 |
+
mask = mask.permute(1, 0, 2)
|
59 |
+
|
60 |
+
outputs[0].data.masked_fill_(mask, 0.0)
|
61 |
+
outputs[1].data.masked_fill_(mask, 0.0)
|
62 |
+
outputs[2].data.masked_fill_(mask[:, 0, :], 1e3) # gate energies
|
63 |
+
|
64 |
+
outputs = fp16_to_fp32(outputs) if self.fp16_run else outputs
|
65 |
+
|
66 |
+
return outputs
|
67 |
+
|
68 |
+
def forward(self, inputs):
|
69 |
+
inputs, input_lengths, targets, max_len, output_lengths, gst_prosody_padded = self.parse_input(inputs)
|
70 |
+
input_lengths, output_lengths = input_lengths.data, output_lengths.data
|
71 |
+
|
72 |
+
embedded_inputs = self.embedding(inputs).transpose(1, 2)
|
73 |
+
|
74 |
+
encoder_outputs = self.encoder(embedded_inputs, input_lengths)
|
75 |
+
|
76 |
+
# GST style embedding plus embedded_inputs before entering the decoder
|
77 |
+
# bin_locations = gst_prosody_padded[:, 0, :]
|
78 |
+
# pitch_intensities = gst_prosody_padded[:, 1:, :]
|
79 |
+
# bin_locations = bin_locations.unsqueeze(2)
|
80 |
+
gst_style_embedding, gst_scores = self.gst(gst_prosody_padded, output_lengths) # [N, 512]
|
81 |
+
gst_style_embedding = gst_style_embedding.expand_as(encoder_outputs)
|
82 |
+
|
83 |
+
encoder_outputs = encoder_outputs + gst_style_embedding
|
84 |
+
|
85 |
+
mel_outputs, gate_outputs, alignments = self.decoder(
|
86 |
+
encoder_outputs, targets, memory_lengths=input_lengths)
|
87 |
+
mel_outputs_postnet = self.postnet(mel_outputs)
|
88 |
+
mel_outputs_postnet = mel_outputs + mel_outputs_postnet
|
89 |
+
|
90 |
+
return self.parse_output(
|
91 |
+
[mel_outputs, mel_outputs_postnet, gate_outputs, alignments, gst_scores],
|
92 |
+
output_lengths)
|
93 |
+
|
94 |
+
def inference(self, inputs, gst_scores): # gst_scores must be a torch tensor
|
95 |
+
inputs = self.parse_input(inputs)
|
96 |
+
embedded_inputs = self.embedding(inputs).transpose(1, 2)
|
97 |
+
encoder_outputs = self.encoder.inference(embedded_inputs)
|
98 |
+
|
99 |
+
# GST inference:
|
100 |
+
gst_style_embedding = self.gst.inference(gst_scores)
|
101 |
+
gst_style_embedding = gst_style_embedding.expand_as(encoder_outputs)
|
102 |
+
|
103 |
+
encoder_outputs = encoder_outputs + gst_style_embedding
|
104 |
+
|
105 |
+
mel_outputs, gate_outputs, alignments = self.decoder.inference(
|
106 |
+
encoder_outputs)
|
107 |
+
|
108 |
+
mel_outputs_postnet = self.postnet(mel_outputs)
|
109 |
+
mel_outputs_postnet = mel_outputs + mel_outputs_postnet
|
110 |
+
|
111 |
+
outputs = self.parse_output(
|
112 |
+
[mel_outputs, mel_outputs_postnet, gate_outputs, alignments])
|
113 |
+
|
114 |
+
return outputs
|
__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from . import hifigan, filelists, models, text
|
audio_processing.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import numpy as np
|
3 |
+
from scipy.signal import get_window
|
4 |
+
import librosa.util as librosa_util
|
5 |
+
|
6 |
+
torch.manual_seed(1234)
|
7 |
+
|
8 |
+
|
9 |
+
def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
|
10 |
+
n_fft=800, dtype=np.float32, norm=None):
|
11 |
+
"""
|
12 |
+
# from librosa 0.6
|
13 |
+
Compute the sum-square envelope of a window function at a given hop length.
|
14 |
+
|
15 |
+
This is used to estimate modulation effects induced by windowing
|
16 |
+
observations in short-time fourier transforms.
|
17 |
+
|
18 |
+
Parameters
|
19 |
+
----------
|
20 |
+
window : string, tuple, number, callable, or list-like
|
21 |
+
Window specification, as in `get_window`
|
22 |
+
|
23 |
+
n_frames : int > 0
|
24 |
+
The number of analysis frames
|
25 |
+
|
26 |
+
hop_length : int > 0
|
27 |
+
The number of samples to advance between frames
|
28 |
+
|
29 |
+
win_length : [optional]
|
30 |
+
The length of the window function. By default, this matches `n_fft`.
|
31 |
+
|
32 |
+
n_fft : int > 0
|
33 |
+
The length of each analysis frame.
|
34 |
+
|
35 |
+
dtype : np.dtype
|
36 |
+
The data type of the output
|
37 |
+
|
38 |
+
Returns
|
39 |
+
-------
|
40 |
+
wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
|
41 |
+
The sum-squared envelope of the window function
|
42 |
+
"""
|
43 |
+
if win_length is None:
|
44 |
+
win_length = n_fft
|
45 |
+
|
46 |
+
n = n_fft + hop_length * (n_frames - 1)
|
47 |
+
x = np.zeros(n, dtype=dtype)
|
48 |
+
|
49 |
+
# Compute the squared window at the desired length
|
50 |
+
win_sq = get_window(window, win_length, fftbins=True)
|
51 |
+
win_sq = librosa_util.normalize(win_sq, norm=norm)**2
|
52 |
+
win_sq = librosa_util.pad_center(win_sq, size=n_fft)
|
53 |
+
|
54 |
+
# Fill the envelope
|
55 |
+
for i in range(n_frames):
|
56 |
+
sample = i * hop_length
|
57 |
+
x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
|
58 |
+
return x
|
59 |
+
|
60 |
+
|
61 |
+
def griffin_lim(magnitudes, stft_fn, n_iters=30):
|
62 |
+
"""
|
63 |
+
PARAMS
|
64 |
+
------
|
65 |
+
magnitudes: spectrogram magnitudes
|
66 |
+
stft_fn: STFT class with transform (STFT) and inverse (ISTFT) methods
|
67 |
+
"""
|
68 |
+
|
69 |
+
angles = np.angle(np.exp(2j * np.pi * np.random.rand(*magnitudes.size())))
|
70 |
+
angles = angles.astype(np.float32)
|
71 |
+
angles = torch.autograd.Variable(torch.from_numpy(angles))
|
72 |
+
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
73 |
+
|
74 |
+
for i in range(n_iters):
|
75 |
+
_, angles = stft_fn.transform(signal)
|
76 |
+
signal = stft_fn.inverse(magnitudes, angles).squeeze(1)
|
77 |
+
return signal
|
78 |
+
|
79 |
+
|
80 |
+
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
81 |
+
"""
|
82 |
+
PARAMS
|
83 |
+
------
|
84 |
+
C: compression factor
|
85 |
+
"""
|
86 |
+
return torch.log(torch.clamp(x, min=clip_val) * C)
|
87 |
+
|
88 |
+
|
89 |
+
def dynamic_range_decompression(x, C=1):
|
90 |
+
"""
|
91 |
+
PARAMS
|
92 |
+
------
|
93 |
+
C: compression factor used to compress
|
94 |
+
"""
|
95 |
+
return torch.exp(x) / C
|
background_images/wallpaper_test.jpg
ADDED
background_images/wallpaper_test_1_crop.jpg
ADDED
background_images/wallpaper_test_1_crop_2.jpg
ADDED
background_images/wallpaper_test_1_crop_3.jpg
ADDED
background_images/wallpaper_test_2.jpg
ADDED
background_images/wallpaper_test_2_crop.jpg
ADDED
background_images/wallpaper_test_mod.jpg
ADDED
background_images/wallpaper_test_mod_2.jpg
ADDED
data_preparation.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import torch
|
5 |
+
import torch.utils.data
|
6 |
+
|
7 |
+
import nn_layers
|
8 |
+
from scipy.io.wavfile import read
|
9 |
+
from text import text_to_sequence
|
10 |
+
from hyper_parameters import tacotron_params
|
11 |
+
|
12 |
+
torch.manual_seed(1234)
|
13 |
+
|
14 |
+
|
15 |
+
class DataPreparation(torch.utils.data.Dataset):
|
16 |
+
|
17 |
+
def __init__(self, audiopaths_and_text, tacotron_hyperparams):
|
18 |
+
self.audiopaths_and_text = audiopaths_and_text
|
19 |
+
self.audio_text_parameters = tacotron_hyperparams
|
20 |
+
self.stft = nn_layers.TacotronSTFT(tacotron_hyperparams['filter_length'], tacotron_hyperparams['hop_length'],
|
21 |
+
tacotron_hyperparams['win_length'], tacotron_hyperparams['n_mel_channels'],
|
22 |
+
self.audio_text_parameters['sampling_rate'],
|
23 |
+
tacotron_hyperparams['mel_fmin'], tacotron_hyperparams['mel_fmax'])
|
24 |
+
random.seed(1234)
|
25 |
+
random.shuffle(self.audiopaths_and_text)
|
26 |
+
|
27 |
+
def load_audiowav_torch(self, audiopath, samp_rate):
|
28 |
+
sr, data = read(audiopath)
|
29 |
+
assert samp_rate == sr, "Sample rate does not match with the configuration"
|
30 |
+
|
31 |
+
return torch.FloatTensor(data.astype(np.float32))
|
32 |
+
|
33 |
+
def melspec_textSequence_pair(self, audiopath_and_text):
|
34 |
+
wav_path, sentence = audiopath_and_text[0], audiopath_and_text[1]
|
35 |
+
# wav to torch tensor
|
36 |
+
wav_torch = self.load_audiowav_torch(wav_path, self.audio_text_parameters['sampling_rate'])
|
37 |
+
wav_torch_norm = wav_torch / self.audio_text_parameters['max_wav_value']
|
38 |
+
wav_torch_norm = wav_torch_norm.unsqueeze(0)
|
39 |
+
wav_torch_norm = torch.autograd.Variable(wav_torch_norm, requires_grad=False)
|
40 |
+
mel_spec = self.stft.mel_spectrogram(wav_torch_norm)
|
41 |
+
mel_spec = torch.squeeze(mel_spec, 0)
|
42 |
+
# text to torch integer tensor sequence
|
43 |
+
sentence_sequence = torch.IntTensor(text_to_sequence(sentence, self.audio_text_parameters['text_cleaners']))
|
44 |
+
|
45 |
+
return sentence_sequence, mel_spec
|
46 |
+
|
47 |
+
def __getitem__(self, index):
|
48 |
+
return self.melspec_textSequence_pair(self.audiopaths_and_text[index])
|
49 |
+
|
50 |
+
def __len__(self):
|
51 |
+
return len(self.audiopaths_and_text)
|
52 |
+
|
53 |
+
|
54 |
+
class DataCollate:
|
55 |
+
|
56 |
+
def __init__(self, number_frames_step):
|
57 |
+
self.number_frames_step = number_frames_step
|
58 |
+
|
59 |
+
def __call__(self, batch):
|
60 |
+
inp_lengths, sorted_decreasing = torch.sort(torch.LongTensor([len(x[0]) for x in batch]),
|
61 |
+
dim=0, descending=True)
|
62 |
+
max_length_in = inp_lengths[0]
|
63 |
+
|
64 |
+
# padding sentences sequences for a fixed-length tensor size
|
65 |
+
sentences_padded = torch.LongTensor(len(batch), max_length_in)
|
66 |
+
sentences_padded.zero_()
|
67 |
+
for i in range(len(sorted_decreasing)):
|
68 |
+
int_seq_sentence = batch[sorted_decreasing[i]][0]
|
69 |
+
# all slots of a line until the end of the sentence. The rest, 0's
|
70 |
+
sentences_padded[i, :int_seq_sentence.size(0)] = int_seq_sentence
|
71 |
+
|
72 |
+
# length of the mel filterbank used
|
73 |
+
num_melfilters = batch[0][1].size(0)
|
74 |
+
|
75 |
+
# longest recorded spectrogram representation + 1 space to mark the end
|
76 |
+
max_length_target = max([x[1].size(1) for x in batch]) # THERE IS A CHANGE FROM THE ORIGINAL CODE!!!
|
77 |
+
# add extra space if the number of frames per step is higher than 1
|
78 |
+
if max_length_target % self.number_frames_step != 0:
|
79 |
+
max_length_target += self.number_frames_step - max_length_target % self.number_frames_step
|
80 |
+
assert max_length_target % self.number_frames_step == 0
|
81 |
+
|
82 |
+
# padding mel spectrogram representations. The output is a 3D tensor
|
83 |
+
melspec_padded = torch.FloatTensor(len(batch), num_melfilters, max_length_target)
|
84 |
+
melspec_padded.zero_()
|
85 |
+
|
86 |
+
# GST new prosody matrices definition with zero padding:
|
87 |
+
prosody_padded = torch.FloatTensor(len(batch), num_melfilters, max_length_target)
|
88 |
+
prosody_padded.zero_()
|
89 |
+
|
90 |
+
gate_padded = torch.FloatTensor(len(batch), max_length_target)
|
91 |
+
gate_padded.zero_()
|
92 |
+
output_lengths = torch.LongTensor(len(batch))
|
93 |
+
|
94 |
+
for j in range(len(sorted_decreasing)):
|
95 |
+
melspec = batch[sorted_decreasing[j]][1]
|
96 |
+
melspec_padded[j, :, :melspec.size(1)] = melspec
|
97 |
+
|
98 |
+
# GST filling padded prosody matrix:
|
99 |
+
prosody_padded[j, :, :melspec.size(1)] = melspec
|
100 |
+
|
101 |
+
gate_padded[j, melspec.size(1) - 1:] = 1
|
102 |
+
output_lengths[j] = melspec.size(1)
|
103 |
+
|
104 |
+
return sentences_padded, inp_lengths, melspec_padded, gate_padded, output_lengths, prosody_padded
|
distributed.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.distributed as dist
|
3 |
+
from torch.nn.modules import Module
|
4 |
+
from torch.autograd import Variable
|
5 |
+
|
6 |
+
|
7 |
+
def _flatten_dense_tensors(tensors):
|
8 |
+
"""Flatten dense tensors into a contiguous 1D buffer. Assume tensors are of
|
9 |
+
same dense type.
|
10 |
+
Since inputs are dense, the resulting tensor will be a concatenated 1D
|
11 |
+
buffer. Element-wise operation on this buffer will be equivalent to
|
12 |
+
operating individually.
|
13 |
+
Arguments:
|
14 |
+
tensors (Iterable[Tensor]): dense tensors to flatten.
|
15 |
+
Returns:
|
16 |
+
A contiguous 1D buffer containing input tensors.
|
17 |
+
"""
|
18 |
+
if len(tensors) == 1:
|
19 |
+
return tensors[0].contiguous().view(-1)
|
20 |
+
flat = torch.cat([t.contiguous().view(-1) for t in tensors], dim=0)
|
21 |
+
return flat
|
22 |
+
|
23 |
+
|
24 |
+
def _unflatten_dense_tensors(flat, tensors):
|
25 |
+
"""View a flat buffer using the sizes of tensors. Assume that tensors are of
|
26 |
+
same dense type, and that flat is given by _flatten_dense_tensors.
|
27 |
+
Arguments:
|
28 |
+
flat (Tensor): flattened dense tensors to unflatten.
|
29 |
+
tensors (Iterable[Tensor]): dense tensors whose sizes will be used to
|
30 |
+
unflatten flat.
|
31 |
+
Returns:
|
32 |
+
Unflattened dense tensors with sizes same as tensors and values from
|
33 |
+
flat.
|
34 |
+
"""
|
35 |
+
outputs = []
|
36 |
+
offset = 0
|
37 |
+
for tensor in tensors:
|
38 |
+
numel = tensor.numel()
|
39 |
+
outputs.append(flat.narrow(0, offset, numel).view_as(tensor))
|
40 |
+
offset += numel
|
41 |
+
return tuple(outputs)
|
42 |
+
|
43 |
+
|
44 |
+
'''
|
45 |
+
This version of DistributedDataParallel is designed to be used in conjunction with the multiproc.py
|
46 |
+
launcher included with this example. It assumes that your run is using multiprocess with 1
|
47 |
+
GPU/process, that the model is on the correct device, and that torch.set_device has been
|
48 |
+
used to set the device.
|
49 |
+
Parameters are broadcasted to the other processes on initialization of DistributedDataParallel,
|
50 |
+
and will be allreduced at the finish of the backward pass.
|
51 |
+
'''
|
52 |
+
|
53 |
+
|
54 |
+
class DistributedDataParallel(Module):
|
55 |
+
|
56 |
+
def __init__(self, module):
|
57 |
+
super(DistributedDataParallel, self).__init__()
|
58 |
+
# fallback for PyTorch 0.3
|
59 |
+
if not hasattr(dist, '_backend'):
|
60 |
+
self.warn_on_half = True
|
61 |
+
else:
|
62 |
+
self.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False
|
63 |
+
|
64 |
+
self.module = module
|
65 |
+
|
66 |
+
for p in self.module.state_dict().values():
|
67 |
+
if not torch.is_tensor(p):
|
68 |
+
continue
|
69 |
+
dist.broadcast(p, 0)
|
70 |
+
|
71 |
+
def allreduce_params():
|
72 |
+
if(self.needs_reduction):
|
73 |
+
self.needs_reduction = False
|
74 |
+
buckets = {}
|
75 |
+
for param in self.module.parameters():
|
76 |
+
if param.requires_grad and param.grad is not None:
|
77 |
+
tp = type(param.data)
|
78 |
+
if tp not in buckets:
|
79 |
+
buckets[tp] = []
|
80 |
+
buckets[tp].append(param)
|
81 |
+
if self.warn_on_half:
|
82 |
+
if torch.cuda.HalfTensor in buckets:
|
83 |
+
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
|
84 |
+
" It is recommended to use the NCCL backend in this case. This currently requires" +
|
85 |
+
"PyTorch built from top of tree master.")
|
86 |
+
self.warn_on_half = False
|
87 |
+
|
88 |
+
for tp in buckets:
|
89 |
+
bucket = buckets[tp]
|
90 |
+
grads = [param.grad.data for param in bucket]
|
91 |
+
coalesced = _flatten_dense_tensors(grads)
|
92 |
+
dist.all_reduce(coalesced)
|
93 |
+
coalesced /= dist.get_world_size()
|
94 |
+
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
|
95 |
+
buf.copy_(synced)
|
96 |
+
|
97 |
+
for param in list(self.module.parameters()):
|
98 |
+
def allreduce_hook(*unused):
|
99 |
+
param._execution_engine.queue_callback(allreduce_params)
|
100 |
+
if param.requires_grad:
|
101 |
+
param.register_hook(allreduce_hook)
|
102 |
+
|
103 |
+
def forward(self, *inputs, **kwargs):
|
104 |
+
self.needs_reduction = True
|
105 |
+
return self.module(*inputs, **kwargs)
|
106 |
+
|
107 |
+
'''
|
108 |
+
def _sync_buffers(self):
|
109 |
+
buffers = list(self.module._all_buffers())
|
110 |
+
if len(buffers) > 0:
|
111 |
+
# cross-node buffer sync
|
112 |
+
flat_buffers = _flatten_dense_tensors(buffers)
|
113 |
+
dist.broadcast(flat_buffers, 0)
|
114 |
+
for buf, synced in zip(buffers, _unflatten_dense_tensors(flat_buffers, buffers)):
|
115 |
+
buf.copy_(synced)
|
116 |
+
def train(self, mode=True):
|
117 |
+
# Clear NCCL communicator and CUDA event cache of the default group ID,
|
118 |
+
# These cache will be recreated at the later call. This is currently a
|
119 |
+
# work-around for a potential NCCL deadlock.
|
120 |
+
if dist._backend == dist.dist_backend.NCCL:
|
121 |
+
dist._clear_group_cache()
|
122 |
+
super(DistributedDataParallel, self).train(mode)
|
123 |
+
self.module.train(mode)
|
124 |
+
'''
|
125 |
+
|
126 |
+
|
127 |
+
'''
|
128 |
+
Modifies existing model to do gradient allreduce, but doesn't change class
|
129 |
+
so you don't need "module"
|
130 |
+
'''
|
131 |
+
|
132 |
+
|
133 |
+
def apply_gradient_allreduce(module):
|
134 |
+
if not hasattr(dist, '_backend'):
|
135 |
+
module.warn_on_half = True
|
136 |
+
else:
|
137 |
+
module.warn_on_half = True if dist._backend == dist.dist_backend.GLOO else False
|
138 |
+
|
139 |
+
for p in module.state_dict().values():
|
140 |
+
if not torch.is_tensor(p):
|
141 |
+
continue
|
142 |
+
dist.broadcast(p, 0)
|
143 |
+
|
144 |
+
def allreduce_params():
|
145 |
+
if module.needs_reduction:
|
146 |
+
module.needs_reduction = False
|
147 |
+
buckets = {}
|
148 |
+
for param in module.parameters():
|
149 |
+
if param.requires_grad and param.grad is not None:
|
150 |
+
tp = type(param.data)
|
151 |
+
if tp not in buckets:
|
152 |
+
buckets[tp] = []
|
153 |
+
buckets[tp].append(param)
|
154 |
+
if module.warn_on_half:
|
155 |
+
if torch.cuda.HalfTensor in buckets:
|
156 |
+
print("WARNING: gloo dist backend for half parameters may be extremely slow." +
|
157 |
+
" It is recommended to use the NCCL backend in this case. This currently requires" +
|
158 |
+
"PyTorch built from top of tree master.")
|
159 |
+
module.warn_on_half = False
|
160 |
+
|
161 |
+
for tp in buckets:
|
162 |
+
bucket = buckets[tp]
|
163 |
+
grads = [param.grad.data for param in bucket]
|
164 |
+
coalesced = _flatten_dense_tensors(grads)
|
165 |
+
dist.all_reduce(coalesced)
|
166 |
+
coalesced /= dist.get_world_size()
|
167 |
+
for buf, synced in zip(grads, _unflatten_dense_tensors(coalesced, grads)):
|
168 |
+
buf.copy_(synced)
|
169 |
+
|
170 |
+
for param in list(module.parameters()):
|
171 |
+
def allreduce_hook(*unused):
|
172 |
+
Variable._execution_engine.queue_callback(allreduce_params)
|
173 |
+
if param.requires_grad:
|
174 |
+
param.register_hook(allreduce_hook)
|
175 |
+
|
176 |
+
def set_needs_reduction(self, input, output):
|
177 |
+
self.needs_reduction = True
|
178 |
+
|
179 |
+
module.register_forward_hook(set_needs_reduction)
|
180 |
+
return module
|
examples_taco2.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# list of examples for quick inferences:
|
2 |
+
infer_from_text_examples = [
|
3 |
+
["The most important qualities of a speech synthesis system are naturalness and intelligibility.",
|
4 |
+
0.45, 0.27, 0.29, 0],
|
5 |
+
["Lego said it remains \"fully committed\" to making bricks from sustainable materials.", 0.29, 0.44, 0.28, 0],
|
6 |
+
["Sensors can also be put in place so that the pollination takes place on the optimum days.", 0.2, 0.42, 0.4, 0],
|
7 |
+
]
|
filelists/ljs_audio_text_test_filelist.txt
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0096.wav|Mrs. De Mohrenschildt thought that Oswald,
|
2 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0022.wav|The Secret Service believed that it was very doubtful that any President would ride regularly in a vehicle with a fixed top, even though transparent.
|
3 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0042.wav|Between the hours of eight and nine p.m. they were occupied with the children in the bedrooms located at the extreme east end of the house.
|
4 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0117.wav|The prisoner had nothing to deal with but wooden panels, and by dint of cutting and chopping he got both the lower panels out.
|
5 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ025-0157.wav|Under these circumstances, unnatural as they are, with proper management, the bean will thrust forth its radicle and its plumule;
|
6 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0219.wav|Oswald demonstrated his thinking in connection with his return to the United States by preparing two sets of identical questions of the type which he might have thought
|
7 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0164.wav|it is not possible to state with scientific certainty that a particular small group of fibers come from a certain piece of clothing
|
8 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0092.wav|has confidence in the dedicated Secret Service men who are ready to lay down their lives for him
|
9 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0118.wav|Since these agencies are already obliged constantly to evaluate the activities of such groups,
|
10 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0016.wav|Jeanne De Mohrenschildt said, quote,
|
11 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0078.wav|no economic panacea, which could simply revive over-night the heavy industries and the trades dependent upon them.
|
12 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0148.wav|Examination of the cartridge cases found on the sixth floor of the Depository Building
|
13 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0202.wav|testified that the information available to the Federal Government about Oswald before the assassination would, if known to PRS,
|
14 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ023-0056.wav|It is an easy document to understand when you remember that it was called into being
|
15 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0025.wav|And in many directions, the intervention of that organized control which we call government
|
16 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0105.wav|Communications in the motorcade.
|
17 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0012.wav|with respect to industry and business, but nearly all are agreed that private enterprise in times such as these
|
18 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0169.wav|and one or two men were allowed to mend clothes and make shoes. The rules made by the Secretary of State were hung up in conspicuous parts of the prison;
|
19 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0088.wav|It just is an aid in seeing in the fact that you only have the one element, the crosshair,
|
20 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0192.wav|"I think I could do that sort of job," said Calcraft, on the spur of the moment.
|
21 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0142.wav|was strewn in front of the dock, and sprinkled it towards the bench with a contemptuous gesture.
|
22 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0015.wav|Weedon and Lecasser to twelve and six months respectively in Coldbath Fields.
|
23 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0033.wav|Prior to November twenty-two, nineteen sixty-three
|
24 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0349.wav|who were each required to send so large a number to Babylon, that in all there were collected no fewer than fifty thousand.
|
25 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0197.wav|At first Mrs. Connally thought that her husband had been killed,
|
26 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0133.wav|Palmer speedily found imitators.
|
27 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0123.wav|Although Brennan testified that the man in the window was standing when he fired the shots, most probably he was either sitting or kneeling.
|
28 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0282.wav|Many years were to elapse before these objections should be fairly met and universally overcome.
|
29 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0204.wav|Special Agent Lyndal L. Shaneyfelt, a photography expert with the FBI,
|
30 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0241.wav|Calcraft served the city of London till eighteen seventy-four, when he was pensioned at the rate of twenty-five shillings per week.
|
31 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ023-0033.wav|we will not allow ourselves to run around in new circles of futile discussion and debate, always postponing the day of decision.
|
32 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0286.wav|There has never been much science in the system of carrying out the extreme penalty in this country; the "finisher of the law"
|
33 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0181.wav|he had his pockets filled with bread and cheese, and it was generally supposed that he had come a long distance to see the fatal show.
|
34 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0052.wav|to the value of twenty thousand pounds.
|
35 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0314.wav|Sir George Grey thought there was a growing feeling in favor of executions within the prison precincts.
|
36 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0056.wav|From August nineteen sixty-two
|
37 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0027.wav|Nor did the methods by which they were perpetrated greatly vary from those in times past.
|
38 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0065.wav|At the former the "Provisional Government" was to be established,
|
39 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0113.wav|The Commission has concluded that at the time of the assassination
|
40 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0410.wav|There among the ruins they still live in the same kind of houses,
|
41 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0137.wav|More seriously, the facts of his defection had become known, leaving him open to almost unanswerable attack by those who opposed his views.
|
42 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0215.wav|One by one the huge uprights of black timber were fitted together,
|
43 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0084.wav|or when the press of the crowd made it impossible for the escort motorcycles to stay in position on the car's rear flanks.
|
44 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ020-0092.wav|Have yourself called on biscuit mornings an hour earlier than usual.
|
45 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0096.wav|On November fourteen, Lawson and Sorrels attended a meeting at Love Field
|
46 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0308.wav|and others who swore to the meetings of the conspirators and their movements. Saward was found guilty,
|
47 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0067.wav|But Mrs. Solomons could not resist the temptation to dabble in stolen goods, and she was found shipping watches of the wrong category to New York.
|
48 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0231.wav|namely, to suppress it and substitute another.
|
49 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0265.wav|and later he became manager of the newly rebuilt Olympic at Wych Street.
|
50 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ024-0102.wav|would be the first to exclaim as soon as an amendment was proposed
|
51 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0233.wav|it consists of several circular perforations, about two inches in diameter,
|
52 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0213.wav|This seems to have decided Courvoisier,
|
53 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0045.wav|This price included nineteen dollars, ninety-five cents for the rifle and the scope, and one dollar, fifty cents for postage and handling.
|
54 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ011-0048.wav|Wherefore let him that thinketh he standeth take heed lest he fall," and was full of the most pointed allusions to the culprit.
|
55 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0294.wav|It was frequently stated in evidence that the jail of the borough was in so unfit a state for the reception of prisoners,
|
56 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0007.wav|There were others less successful.
|
57 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0138.wav|perhaps the tales that travelers told him were exaggerated as travelers' tales are likely to be,
|
58 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0029.wav|that is reflected in definite and comprehensive operating procedures.
|
59 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0121.wav|The prisoners were in due course transferred to Newgate, to be put upon their trial at the Central Criminal Court.
|
60 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0146.wav|They had to handcuff her by force against the most violent resistance, and still she raged and stormed,
|
61 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0111.wav|The Secret Service has attempted to perform this function through the activities of its Protective Research Section
|
62 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0257.wav|But the affair still remained a profound mystery. No light was thrown upon it till, towards the end of March,
|
63 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0260.wav|Yet the public opinion of the whole body seems to have checked dissipation.
|
64 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0014.wav|the Presidential limousine arrived at the emergency entrance of the Parkland Hospital at about twelve:thirty-five p.m.
|
65 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0093.wav|Oswald was arrested and jailed by the New Orleans Police Department for disturbing the peace, in connection with a street fight which broke out when he was accosted
|
66 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0324.wav|gaming of all sorts should be peremptorily forbidden under heavy pains and penalties.
|
67 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0115.wav|we have reached into the heart of the problem which is to provide such annual earnings for the lowest paid worker as will meet his minimum needs.
|
68 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0191.wav|it had established periodic regular review of the status of four hundred individuals;
|
69 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0197.wav|who was one of the first witnesses to alert the police to the Depository as the source of the shots, as has been discussed in chapter three.
|
70 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0253.wav|were governed by rules which they themselves had framed, and under which subscriptions were levied
|
71 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0288.wav|might have been more alert in the Dallas motorcade if they had retired promptly in Fort Worth.
|
72 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0112.wav|Many of the old customs once prevalent in the State Side, so properly condemned and abolished,
|
73 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0189.wav|who was presently attacked in the same way as the others, but, but, thanks to the prompt administration of remedies, he recovered.
|
74 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0230.wav|basically, although I hate the USSR and socialist system I still think marxism can work under different circumstances, end quote.
|
75 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0161.wav|The Secret Service should not and does not plan to develop its own intelligence gathering facilities to duplicate the existing facilities of other Federal agencies.
|
76 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0011.wav|that not more than one bottle of wine or one quart of beer could be issued at one time. No account was taken of the amount of liquors admitted in one day,
|
77 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0206.wav|and caused a number of stout additional barriers to be erected in front of the scaffold,
|
78 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0261.wav|The poorer prisoners were not in abject want, as in other prisons,
|
79 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0189.wav|Hunt, in consideration of the information he had given, escaped death, and was sentenced to transportation for life.
|
80 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0317.wav|The former, which consisted principally of the tread-wheel, cranks, capstans, shot-drill,
|
81 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ011-0041.wav|Visited Mr. Fauntleroy. My application for books for him not having been attended, I had no prayer-book to give him.
|
82 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ023-0089.wav|That is not only my accusation.
|
83 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0224.wav|would not agree with that particular wording, end quote.
|
84 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0104.wav|He found them at length residing at the latter place, one as a landed proprietor, the other as a publican.
|
85 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0055.wav|The jury did not believe him, and the verdict was for the defendants.
|
86 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0306.wav|These had been attributed to political action; some thought that the large purchases in foreign grains, effected at losing prices,
|
87 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0052.wav|To supplement the PRS files, the Secret Service depends largely on local police departments and local offices of other Federal agencies
|
88 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0459.wav|Its bricks, measuring about thirteen inches square and three inches in thickness, were burned and stamped with the usual short inscription:
|
89 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0183.wav|Soon afterwards Dixon died, showing all the symptoms already described.
|
90 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0084.wav|At length the ordinary pauses, and then, in a deep tone, which, though hardly above a whisper, is audible to all, says,
|
91 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0170.wav|That in this vast metropolis, the center of wealth, civilization, and information;
|
92 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0277.wav|This is proved by contemporary accounts, especially one graphic and realistic article which appeared in the 'Times,'
|
93 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0061.wav|He staggers towards the pew, reels into it, stumbles forward, flings himself on the ground, and, by a curious twist of the spine,
|
94 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0201.wav|to select a sufficiently spacious piece of ground, and erect a prison which from foundations to roofs should be in conformity with the newest ideas.
|
95 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0063.wav|He had repeated this wish only a few days before, during his visit to Tampa, Florida.
|
96 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0257.wav|a third miscreant made a similar but far less serious attempt in the month of July following.
|
97 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0106.wav|The keeper tries to appear unmoved, but his eye wanders anxiously over the combustible assembly.
|
98 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0121.wav|After the construction and action of the machine had been explained, the doctor asked the governor what kind of men he had commanded at Goree,
|
99 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0069.wav|the Secret Service had received from the FBI some nine thousand reports on members of the Communist Party.
|
100 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0202.wav|The news-vendor was also a tobacconist,
|
101 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0230.wav|Shortly before the day fixed for execution, Bishop made a full confession, the bulk of which bore the impress of truth,
|
102 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0248.wav|and stated that in his opinion Newgate, as the common jail of Middlesex, was wholly inadequate to the proper confinement of its prisoners.
|
103 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0053.wav|who had been greatly upset by her experience, was able to view a lineup of four men handcuffed together at the police station.
|
104 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0177.wav|For the first time
|
105 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0036.wav|it was hoped that their rulers would hire accommodation in the county prisons, and that the inferior establishments would in course of time disappear.
|
106 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0054.wav|carbohydrates (starch, cellulose) and fats.
|
107 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ020-0085.wav|Break apart from one another and pile on a plate, throwing a clean doily or a small napkin over them. Break open at table.
|
108 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0226.wav|The several military intelligence agencies reported crank mail and similar threats involving the President.
|
109 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0233.wav|he shot an old soldier who had attempted to detain him. He was convicted and executed.
|
110 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0152.wav|The portion of the palm which was identified was the heel of the right palm, i.e., the area near the wrist, on the little finger side.
|
111 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0009.wav|as indefatigable and self-sacrificing, found by personal visitation that the condition of jails throughout the kingdom was,
|
112 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0134.wav|Within a few weeks occurred the Leeds poisoning case, in which the murderer undoubtedly was inspired by the facts made public at Palmer's trial.
|
113 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0318.wav|was to be the rule for all convicted prisoners throughout the early stages of their detention;
|
114 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ020-0093.wav|Rise, wash face and hands, rinse the mouth out and brush back the hair.
|
115 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0188.wav|Probert was then admitted as a witness, and the case was fully proved against Thurtell, who was hanged in front of Hertford Jail.
|
116 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0202.wav|The preference given to the Pentonville system destroyed all hopes of a complete reformation of Newgate.
|
117 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0027.wav|Oswald's revolver
|
118 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0176.wav|He admitted to fantasies about being powerful and sometimes hurting and killing people, but refused to elaborate on them.
|
119 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0354.wav|Doubts were long entertained whether Thomas Wainwright,
|
120 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0185.wav|From the Presidential airplane, the Vice President telephoned Attorney General Robert F. Kennedy,
|
121 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0137.wav|They were not obliged to attend chapel, and seldom if ever went; "prisoners," said one of them under examination, "did not like the trouble of going to chapel."
|
122 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0085.wav|The Hidell signature on the notice of classification was in the handwriting of Oswald.
|
123 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0037.wav|the schoolmaster and the juvenile prisoners being seated round the communion-table, opposite the pulpit.
|
124 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0021.wav|Later on he had devoted himself to the personal investigation of the prisons of the United States.
|
125 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0082.wav|and this particular official took excellent care to select as residents for his own ward those most suitable from his own point of view.
|
126 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0380.wav|with hope to the last. There is always the chance of a flaw in the indictment, of a missing witness, or extenuating circumstances.
|
127 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0344.wav|monitor, or schoolmaster, nor to be engaged in the service of any officer of the prison.
|
128 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0161.wav|These disciplinary improvements were, however, only slowly and gradually introduced.
|
129 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0145.wav|And here I may not omit to tell the use to which the mould dug out of the great moat was turned, nor the manner wherein the wall was wrought.
|
130 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0349.wav|His disclaimer, distinct and detailed on every point, was intended simply for effect.
|
131 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0010.wav|Some of the members of that group saw a good deal of the Oswalds through the fall of nineteen sixty-three,
|
132 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ027-0178.wav|These were undoubtedly perennibranchs. In the Permian and Triassic higher forms appeared, which were certainly caducibranch.
|
133 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ041-0070.wav|He did not rise above the rank of private first class, even though he had passed a qualifying examination for the rank of corporal.
|
134 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0266.wav|Thus in the years between May first, eighteen twenty-seven, and thirtieth April, eighteen thirty-one,
|
135 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0091.wav|In this recent reorganization we have recognized three distinct functions:
|
136 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0129.wav|which marked the growth of public interest in prison affairs, and which was the germ of the new system
|
137 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0215.wav|William Roupell was the eldest but illegitimate son of a wealthy man who subsequently married Roupell's mother, and had further legitimate issue.
|
138 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0194.wav|and behaved so as to justify a belief that he had been a jail-bird all his life.
|
139 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0137.wav|that numbers of men, "lifers," and others with ten, fourteen, or twenty years to do, can be trusted to work out of doors without bolts and bars
|
140 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0289.wav|the latter raised eighteen pence among them to pay for a truss of straw for the poor woman to lie on.
|
141 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ023-0016.wav|In nineteen thirty-three you and I knew that we must never let our economic system get completely out of joint again
|
142 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ011-0141.wav|There were at the moment in Newgate six convicts sentenced to death for forging wills.
|
143 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0283.wav|to do them mere justice, there was at least till then a half-drunken ribald gaiety among the crowd that made them all akin."
|
144 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ035-0082.wav|The only interval was the time necessary to ride in the elevator from the second to the sixth floor and walk back to the southeast corner.
|
145 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0194.wav|Anyone who was familiar with that area of Dallas would have known that the motorcade would probably pass the Texas School Book Depository to get from Main Street
|
146 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0124.wav|occupied when they saw it last, but a few hours ago, by their comrades who are now dead;
|
147 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0162.wav|In the Presidential Limousine
|
148 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0223.wav|The plan provides for an additional two hundred five agents for the Secret Service. Seventeen of this number are proposed for the Protective Research Section;
|
149 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0228.wav|their harsh and half-cracked voices full of maudlin, besotted sympathy for those about to die.
|
150 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0096.wav|The eight courts above enumerated were well supplied with water;
|
151 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0288.wav|After this the other conspirators traveled to obtain genuine bills and master the system of the leading houses at home and abroad.
|
152 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0106.wav|in which latterly a copper had been fixed for the cooking of provisions sent in by charitable persons.
|
153 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ025-0129.wav|On each lobe of the bi-lobed leaf of Venus flytrap are three delicate filaments which stand out at right angles from the surface of the leaf.
|
154 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0013.wav|Hands Off Cuba, end quote, an application form for, and a membership card in,
|
155 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0115.wav|of the person who is actually in the exercise of the executive power, or
|
156 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0145.wav|But reformation was only skin deep. Below the surface many of the old evils still rankled.
|
157 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0355.wav|came up in all respects to modern requirements.
|
158 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0289.wav|There was unrestrained association of untried and convicted, juvenile with adult prisoners, vagrants, misdemeanants, felons.
|
159 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0222.wav|in Fort Worth, there occurred a breach of discipline by some members of the Secret Service who were officially traveling with the President.
|
160 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0367.wav|Under the new system the whole of the arrangements from first to last fell upon the officers.
|
161 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0097.wav|Agent Quigley did not know of Oswald's prior FBI record when he interviewed him,
|
162 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0075.wav|as effectually to rebuke and abash the profane spirit of the more insolent and daring of the criminals.
|
163 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0022.wav|provided by other agencies.
|
164 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0085.wav|at Newgate and York Castle as long as five years; "at Ilchester and Morpeth for seven years; at Warwick for eight years,
|
165 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0075.wav|Hosty had inquired earlier and found no evidence that it was functioning in the Dallas area.
|
166 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0098.wav|One was the "yeoman of the halter," a Newgate official, the executioner's assistant, whom Mr. J. T. Smith, who was present at the execution,
|
167 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0102.wav|The second attack was fatal, and ended in Cook's death from tetanus.
|
168 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0105.wav|Second, the adequacy of other advance preparations for the security of the President, during his visit to Dallas,
|
169 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0206.wav|He was a tall, slender man, with a long face and iron-gray hair.
|
170 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0271.wav|Whether it was greed or a quarrel that drove Greenacre to the desperate deed remains obscure.
|
171 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0086.wav|with such further separation as the justices should deem conducive to good order and discipline.
|
172 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0097.wav|and considerably better living quarters than those accorded to Soviet citizens of equal age and station.
|
173 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0126.wav|we would handle it in due course, in accord with the whole context of the investigation. End quote.
|
174 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ041-0022.wav|Oswald first wrote, quote, Edward Vogel, end quote, an obvious misspelling of Voebel's name,
|
175 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0025.wav|The bank enjoyed an excellent reputation, it had a good connection, and was supposed to be perfectly sound.
|
176 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0194.wav|But Burke and Hare had their imitators further south,
|
177 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0416.wav|(if man may speak so confidently of His great impenetrable counsels), for an eternal Testimony of His great work in the confusion of Man's pride,
|
178 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0130.wav|are all huddled together without discrimination, oversight, or control."
|
179 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0005.wav|About this time Davidson and Gordon, the people above-mentioned,
|
180 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0125.wav|with this, placed against the wall near the chevaux-de-frise, he made an escalade.
|
181 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0224.wav|As Dwyer survived, Cannon escaped the death sentence, which was commuted to penal servitude for life.
|
182 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0019.wav|refuted by abundant evidence, and having no foundation whatever in truth.
|
183 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0221.wav|With either great ambivalence, or cold calculation he prepared completely different answers to the same questions.
|
184 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0063.wav|which was generally more formally Gothic than the printing of the German workmen,
|
185 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0006.wav|They took off in the Presidential plane, Air Force One, at eleven a.m., arriving at San Antonio at one:thirty p.m., Eastern Standard Time.
|
186 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ024-0054.wav|democracy will have failed far beyond the importance to it of any king of precedent concerning the judiciary.
|
187 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0044.wav|the same callous indifference to the moral well-being of the prisoners, the same want of employment and of all disciplinary control.
|
188 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0154.wav|four point eight to five point six seconds if the second shot missed,
|
189 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0090.wav|they seem unduly restrictive in continuing to require some manifestation of animus against a Government official.
|
190 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0421.wav|it was the beginning of the great collections of Babylonian antiquities in the museums of the Western world.
|
191 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0205.wav|then I would say the possibility exists, these fibers could have come from this blanket, end quote.
|
192 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0335.wav|The books and journals he was to keep were minutely specified, and his constant presence in or near the jail was insisted upon.
|
193 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0045.wav|Wallace's relations warned him against his Liverpool friend,
|
194 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0002.wav|Chapter four. The Assassin: Part six.
|
195 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0159.wav|This was all the police wanted to know.
|
196 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0140.wav|In the plant as in the animal metabolism must consist of anabolic and catabolic processes.
|
197 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0171.wav|I will briefly describe one or two of the more remarkable murders in the years immediately following, then pass on to another branch of crime.
|
198 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0007.wav|Three others subsequently identified Oswald from a photograph.
|
199 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0174.wav|microscopic and UV (ultra violet) characteristics, end quote.
|
200 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0110.wav|he apparently adjusted well enough there to have had an average, although gradually deteriorating, school record
|
201 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0192.wav|he had a total of between four point eight and five point six seconds between the two shots which hit
|
202 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0261.wav|When he appeared before the Commission, Michael Paine lifted the blanket
|
203 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0097.wav|Lee was brought up in this atmosphere of constant money problems, and I am sure it had quite an effect on him, and also Robert, end quote.
|
204 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0249.wav|Mrs. Earlene Roberts, the housekeeper at Oswald's roominghouse and the last person known to have seen him before he reached tenth Street and Patton Avenue,
|
205 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0248.wav|Marwood was proud of his calling, and when questioned as to whether his process was satisfactory, replied that he heard "no complaints."
|
206 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0083.wav|As Mr. Buxton pointed out, many old acts of parliament designed to protect the prisoner were still in full force.
|
207 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0029.wav|This was Delarue's watch, fully identified as such, which Hocker told his brother Delarue had given him the morning of the murder.
|
208 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0110.wav|have been best calculated to promote industrial recovery and a permanent improvement of business and labor conditions.
|
209 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0107.wav|he slept in the same bed with a highwayman on one side, and a man charged with murder on the other.
|
210 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0076.wav|Ronald Simmons, chief of the U.S. Army Infantry Weapons Evaluation Branch of the Ballistics Research Laboratory, said, quote,
|
211 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0347.wav|had undoubtedly a solemn, impressive effect upon those outside.
|
212 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0072.wav|After the end of the fifteenth century the degradation of printing, especially in Germany and Italy,
|
213 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ024-0018.wav|Consequently, although there never can be more than fifteen, there may be only fourteen, or thirteen, or twelve.
|
214 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0180.wav|that the fibers were caught in the crevice of the rifle's butt plate, quote, in the recent past, end quote,
|
215 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0083.wav|and measures taken to arrest them when their plans were so far developed that no doubt could remain as to their guilt.
|
216 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0299.wav|and gave the garnish for the common side at that sum, which is five shillings more than Mr. Neild says was extorted on the common side.
|
217 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0143.wav|the Secret Service did not at the time of the assassination have any established procedure governing its relationships with them.
|
218 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0054.wav|Solomons, while waiting to appear in court, persuaded the turnkeys to take him to a public-house, where all might "refresh."
|
219 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0270.wav|Vegetables, especially the potato, that most valuable anti-scorbutic, was too often omitted.
|
220 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ035-0164.wav|three minutes after the shooting.
|
221 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0326.wav|Maltby and Co. would issue warrants on them deliverable to the importer, and the goods were then passed to be stored in neighboring warehouses.
|
222 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0173.wav|The essential point to be remembered is that the ornament, whatever it is, whether picture or pattern-work, should form part of the page,
|
223 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0056.wav|On December twenty-six, nineteen sixty-three, the FBI circulated additional instructions to all its agents,
|
224 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0319.wav|provided only that their security was not jeopardized, and dependent upon the enforcement of another new rule,
|
225 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0040.wav|The fact was that the years as they passed, nearly twenty in all, had worked but little permanent improvement in this detestable prison.
|
226 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0231.wav|His body was found lying in a pool of blood in a night-dress, stabbed over and over again in the left side.
|
227 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0226.wav|One half of the mutineers fell upon him unawares with handspikes and capstan-bars.
|
228 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0239.wav|He had been committed for an offense for which he was acquitted.
|
229 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0112.wav|The Commission also regards the security arrangements worked out by Lawson and Sorrels at Love Field as entirely adequate.
|
230 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0125.wav|that Oswald was a good shot, somewhat better than or equal to -- better than the average let us say.
|
231 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0196.wav|He cried out, quote, Oh, no, no, no. My God, they are going to kill us all, end quote,
|
232 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0228.wav|He was released from Broadmoor in eighteen seventy-eight, and went abroad.
|
233 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0228.wav|On the other hand, he could have traveled some distance with the money he did have and he did return to his room where he obtained his revolver.
|
234 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0168.wav|in the other was the sacred precinct of Jupiter Belus,
|
235 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0140.wav|and in such an effort we should be able to secure for employers and employees and consumers
|
236 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0280.wav|Again the wretched creature succeeded in obtaining foothold, but this time on the left side of the drop.
|
237 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0159.wav|To constitute this the aristocratic quarter, unwarrantable demands were made upon the space properly allotted to the female felons,
|
238 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0274.wav|and the windows of the opposite houses, which commanded a good view, as usual fetched high prices.
|
239 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ035-0014.wav|it sounded high and I immediately kind of looked up,
|
240 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0120.wav|which he believed was where the bag reached when it was laid on the seat with one edge against the door.
|
241 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0015.wav|which Johnson said he did not receive until after the assassination. The letter said in part, quote,
|
242 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0299.wav|the latter end of the nineteenth century, several of which still fall far short of our English ideal,
|
243 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0206.wav|After comparing the rifle in the simulated photograph with the rifle in Exhibit Number one thirty-three A, Shaneyfelt testified, quote,
|
244 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0494.wav|Between the several sections were wide spaces where foot soldiers and charioteers might fight.
|
245 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0099.wav|and report at length upon the condition of the prisons of the country.
|
246 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0144.wav|developed to a colossal extent the frauds he had already practiced as a subordinate.
|
247 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0221.wav|It was intended as far as possible that, except awaiting trial, no prisoner should find himself relegated to Newgate.
|
248 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0088.wav|in one, for seven years -- that of a man sentenced to death, for whom great interest had been made, but whom it was not thought right to pardon.
|
249 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0216.wav|nineteen sixty-three, merely to disarm her and to provide a justification of sorts,
|
250 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0135.wav|that he was not yet twenty years old when he went to the Soviet Union with such high hopes and not quite twenty-three when he returned bitterly disappointed.
|
251 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0196.wav|On the other hand, it is urged that all features of the protection of the President and his family should be committed to an elite and independent corps.
|
252 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0278.wav|This was the well and astutely devised plot of the brothers Bidwell,
|
253 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0238.wav|and then looked around again and saw more of this movement, and so I proceeded to go to the back seat and get on top of him.
|
254 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0309.wav|where probably the money still remains.
|
255 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ041-0199.wav|is shown most clearly by his employment relations after his return from the Soviet Union. Of course, he made his real problems worse to the extent
|
256 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0076.wav|The lax discipline maintained in Newgate was still further deteriorated by the presence of two other classes of prisoners who ought never to have been inmates of such a jail.
|
257 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0118.wav|He had high motivation. He had presumably a good to excellent rifle and good ammunition.
|
258 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ024-0019.wav|And there may be only nine.
|
259 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0085.wav|The fire had not quite burnt out at twelve, in nearly four hours, that is to say.
|
260 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0031.wav|This fixed the crime pretty certainly upon Müller, who had already left the country, thus increasing suspicion under which he lay.
|
261 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0032.wav|Dallas police stood at intervals along the fence and Dallas plain clothes men mixed in the crowd.
|
262 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0004.wav|General Supervision of the Secret Service
|
263 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0096.wav|This is a definite advantage to the shooter, the vehicle moving directly away from him and the downgrade of the street, and he being in an elevated position
|
264 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ041-0195.wav|Oswald's interest in Marxism led some people to avoid him,
|
265 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0158.wav|After a moment's hesitation, she told me that he worked at the Texas School Book Depository near the downtown area of Dallas.
|
266 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0162.wav|In planning its data processing techniques,
|
267 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0051.wav|and paying great attention to the "press work" or actual process of printing,
|
268 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0136.wav|Of all the ancient descriptions of the famous walls and the city they protected, that of Herodotus is the fullest.
|
269 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0134.wav|Shortly after the assassination Brennan noticed
|
270 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0348.wav|Every facility was promised. The sanction of the Secretary of State would not be withheld if plans and estimates were duly submitted,
|
271 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0219.wav|While one stood over the fire with the papers, another stood with lighted torch to fire the house.
|
272 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ011-0245.wav|Mr. Mullay called again, taking with him five hundred pounds in cash. Howard discovered this, and his manner was very suspicious;
|
273 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0035.wav|Organization of the Motorcade
|
274 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0135.wav|While he had drawn some attention to himself and had actually appeared on two radio programs, he had been attacked by Cuban exiles and arrested,
|
275 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0090.wav|He was very much interested in autobiographical works of outstanding statesmen of the United States, to whom his wife thought he compared himself.
|
276 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0034.wav|When any given "protist" has to be classified the case must be decided on its individual merits;
|
277 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0092.wav|as to the fact that he was an outstanding man, end quote.
|
278 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0050.wav|Palmer, who was only thirty-one at the time of his trial, was in appearance short and stout, with a round head
|
279 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ036-0104.wav|Whaley picked Oswald.
|
280 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0055.wav|High authorities were in favor of continuous separation.
|
281 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0030.wav|The brutal ferocity of the wild beast once aroused, the same means, the same weapons were employed to do the dreadful deed,
|
282 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ038-0047.wav|Some of the officers saw Oswald strike McDonald with his fist. Most of them heard a click which they assumed to be a click of the hammer of the revolver.
|
283 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0074.wav|Let us pass on.
|
284 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0069.wav|Efforts made by the Bureau since the assassination, on the other hand,
|
285 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0211.wav|They were never left quite alone for fear of suicide, and for the same reason they were searched for weapons or poisons.
|
286 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0053.wav|It is the conclusion of the Commission that, even in the absence of Secret Service criteria
|
287 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0093.wav|Frazier estimated that the bag was two feet long, quote, give and take a few inches, end quote, and about five or six inches wide.
|
288 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0149.wav|The turnkeys left the prisoners very much to themselves, never entering the wards after locking-up time, at dusk, till unlocking next morning,
|
289 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0211.wav|The false coin was bought by an agent from an agent, and dealings were carried on secretly at the "Clock House" in Seven Dials.
|
290 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0054.wav|This contrivance appears to have been copied with improvements from that which had been used in Dublin at a still earlier date,
|
291 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0052.wav|that his commitment to Marxism was an important factor influencing his conduct during his adult years.
|
292 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0023.wav|Two weeks pass, and at last you stand on the eastern edge of the plateau
|
293 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0184.wav|Lord Ferrers' body was brought to Surgeons' Hall after execution in his own carriage and six;
|
294 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0252.wav|A committee was appointed, under the presidency of the Duke of Richmond
|
295 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0266.wav|has probably no parallel in the annals of crime. Saward himself is a striking and in some respects an unique figure in criminal history.
|
296 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0059.wav|even after sentence, and until within a few hours of execution, he was buoyed up with the hope of reprieve.
|
297 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ024-0034.wav|What do they mean by the words "packing the Court"?
|
298 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0089.wav|He was engaged in whitewashing and cleaning; the officer who had him in charge left him on the stairs leading to the gallery.
|
299 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0227.wav|with two hits, within four point eight and five point six seconds.
|
300 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0096.wav|have now come into general use and are obviously a great improvement on the ordinary "modern style" in use in England, which is in fact the Bodoni type
|
301 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0129.wav|who threatened to betray the theft. But Brewer, either before or after this, succumbed to temptation,
|
302 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0157.wav|and that, as he was starving, he had resolved on this desperate deed,
|
303 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ038-0264.wav|He concluded that, quote, the general rifling characteristics of the rifle are of the same type as those found on the bullet
|
304 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0165.wav|When security arrangements at the airport were complete, the Secret Service made the necessary arrangements for the Vice President to leave the hospital.
|
305 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0244.wav|The effect of establishing the forgeries would be to restore to the Roupell family lands for which a price had already been paid
|
306 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0071.wav|in the face of impediments confessedly discouraging
|
307 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0340.wav|Such of the Babylonians as witnessed the treachery took refuge in the temple of Jupiter Belus;
|
308 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0164.wav|with the idea of subjecting her to the irritant poison slowly but surely until the desired effect, death, was achieved.
|
309 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0197.wav|I then told the officers that their primary duty was traffic and crowd control and that they should be alert for any persons who might attempt to throw anything
|
310 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0098.wav|Mr. Oxenford having denied that he had made any transfer of stock, the matter was at once put into the hands of the police.
|
311 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0049.wav|led him to think seriously of trying his fortunes in another land.
|
312 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0014.wav|quote, that the crowd was about the same as the one which came to see him before but there were one hundred thousand extra people on hand who came to see Mrs. Kennedy.
|
313 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0186.wav|A milliner's porter,
|
314 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0027.wav|Yet even so early as the death of the first Sir John Paul,
|
315 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0049.wav|Marina Oswald, however, recalled that her husband was upset by this interview.
|
316 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0021.wav|at fourteen he was a pickpocket and a "duffer," or a seller of sham goods.
|
317 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0140.wav|otherwise he would have been stripped of his clothes. End quote.
|
318 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0130.wav|Shortly thereafter, less than eighteen months after his defection, about six weeks before he met Marina Prusakova,
|
319 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0180.wav|His letter to the Corporation, under date fourth June,
|
320 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0108.wav|He was struck with the appearance of the corpse, which was not emaciated, as after a long disease ending in death;
|
321 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0268.wav|Women saw men if they merely pretended to be wives; even boys were visited by their sweethearts.
|
322 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0125.wav|of residence in the U.S.S.R. against any cause which I join, by association,
|
323 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0231.wav|It was Tester's business, who had access to the railway company's books, to watch for this.
|
324 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0225.wav|The rentals of rooms and fees went to the warden, whose income was two thousand three hundred seventy-two pounds.
|
325 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0072.wav|The employees raced the elevators to the first floor. Givens saw Oswald standing at the gate on the fifth floor as the elevator went by.
|
326 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0033.wav|He began to treat me better. He helped me more -- although he always did help. But he was more attentive, end quote.
|
327 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0058.wav|to infuse blood and fluids into the circulatory system.
|
328 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0197.wav|During November the Dallas papers reported frequently on the plans for protecting the President, stressing the thoroughness of the preparations.
|
329 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0047.wav|Oswald and his family lived for a brief period with his mother at her urging, but Oswald soon decided to move out.
|
330 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0026.wav|seems necessary to produce the same result of justice and right conduct
|
331 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0230.wav|The prison allowances were eked out by the broken victuals generously given by several eating-house keepers in the city,
|
332 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0252.wav|Ted Callaway, who saw the gunman moments after the shooting, testified that Commission Exhibit Number one sixty-two
|
333 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0008.wav|Meanwhile, Chief Curry ordered the police base station to notify Parkland Hospital that the wounded President was en route.
|
334 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0021.wav|all one had to do was get a high building someday with a telescopic rifle, and there was nothing anybody could do to defend against such an attempt.
|
335 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0179.wav|being reviewed regularly.
|
336 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ025-0118.wav|and that, however diverse may be the fabrics or tissues of which their bodies are composed, all these varied structures result
|
337 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0278.wav|Zopyrus, when they told him, not thinking that it could be true, went and saw the colt with his own eyes;
|
338 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0090.wav|Not only did their presence tend greatly to interfere with the discipline of the prison, but their condition was deplorable in the extreme.
|
339 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0045.wav|that she would be able to leave the Soviet Union. Marina Oswald has denied this.
|
340 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0289.wav|For he cut off his own nose and ears, and then, clipping his hair close and flogging himself with a scourge,
|
341 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0276.wav|Calcraft, the moment he had adjusted the cap and rope, ran down the steps, drew the bolt, and disappeared.
|
342 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0122.wav|treated the gunshot wound in the left thigh.
|
343 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0205.wav|he received a retaining fee of five pounds, five shillings, with the usual guinea for each job;
|
344 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0248.wav|leading to an inequality, uncertainty, and inefficiency of punishment productive of the most prejudicial results.
|
345 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0183.wav|it was not surprising that the replica sack made on December one, nineteen sixty-three,
|
346 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0001.wav|Report of the President's Commission on the Assassination of President Kennedy. The Warren Commission Report. By The President's Commission on the Assassination of President Kennedy.
|
347 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0218.wav|In eighteen fifty-five
|
348 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0102.wav|Here and there a book is printed in France or Germany with some pretension to good taste,
|
349 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0125.wav|It was diverted from its proper uses, and, as the "place of the greatest comfort," was allotted to persons who should not have been sent to Newgate at all.
|
350 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0022.wav|A formal and thorough description of the responsibilities of the advance agent is now in preparation by the Service.
|
351 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0212.wav|On the night of the eleventh day Gobrias killed the son of the King.
|
352 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0357.wav|yet we may be sure that Babylon was taken by Darius only by use of stratagem. Its walls were impregnable.
|
353 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0199.wav|there was no case to make out; why waste money on lawyers for the defense? His demeanor was cool and collected throughout;
|
354 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0077.wav|A man named Lears, under sentence of transportation for an attempt at murder on board ship, got up part of the way,
|
355 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0194.wav|and that executors or persons having lawful possession of the bodies
|
356 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0094.wav|Discovery of the murder came in this wise. O'Connor, a punctual and well-conducted official, was at once missed at the London Docks.
|
357 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0079.wav|Caslon's type is clear and neat, and fairly well designed;
|
358 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0052.wav|In the nutrition of the animal the most essential and characteristic part of the food supply is derived from vegetable
|
359 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0005.wav|One of the earliest of the big operators in fraudulent finance was Edward Beaumont Smith,
|
360 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0072.wav|I then stepped off of it and the officer picked it up in the middle and it bent so.
|
361 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ036-0067.wav|According to McWatters, the Beckley bus was behind the Marsalis bus, but he did not actually see it.
|
362 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ025-0098.wav|and it is probable that amyloid substances are universally present in the animal organism, though not in the precise form of starch.
|
363 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0257.wav|during which time a host of witnesses were examined, and the committee presented three separate reports,
|
364 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0024.wav|Thus in eighteen thirteen the exaction of jail fees had been forbidden by law,
|
365 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0154.wav|In eighteen ninety-four,
|
366 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0059.wav|(three) his experience and practice after leaving the Marine Corps, and (four) the accuracy of the weapon and the quality of the ammunition.
|
367 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0150.wav|He is allowed intercourse with prostitutes who, in nine cases out of ten, have originally conduced to his ruin;
|
368 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0001.wav|Chronicles of Newgate, Volume two. By Arthur Griffiths. Section eighteen: Newgate notorieties continued, part three.
|
369 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0158.wav|feeling, as he said, that he might as well be shot or hanged as remain in such a state.
|
370 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0281.wav|who had borne the Queen's commission, first as cornet, and then lieutenant, in the tenth Hussars.
|
371 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0055.wav|and he could disassemble it more rapidly.
|
372 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0218.wav|A new accomplice was now needed within the company's establishment, and Pierce looked about long before he found the right person.
|
373 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ027-0006.wav|In all these lines the facts are drawn together by a strong thread of unity.
|
374 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0049.wav|He had here completed his ascent.
|
375 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0088.wav|It was not likely that a system which left innocent men -- for the great bulk of new arrivals were still untried
|
376 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0133.wav|a great change must have occurred in Oswald's thinking to induce him to return to the United States.
|
377 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0234.wav|While he did become enraged at at least one point in his interrogation,
|
378 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0033.wav|The adequacy of existing procedures can fairly be assessed only after full consideration of the difficulty of the protective assignment,
|
379 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0061.wav|and having, quote, somewhat bushy, end quote, hair.
|
380 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0025.wav|the officers of Klein's discovered that a rifle bearing serial number C two seven six six had been shipped to one A. Hidell,
|
381 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0197.wav|in view of all the information concerning Oswald in its files, should have alerted the Secret Service to Oswald's presence in Dallas
|
382 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0130.wav|and stole paper on a much larger scale than Brown.
|
383 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0265.wav|It was recommended that the dietaries should be submitted and approved like the rules; that convicted prisoners should not receive any food but the jail allowance;
|
384 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0105.wav|He presented Arnold Johnson, Gus Hall,
|
385 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0043.wav|This went on for some time, and might never have been discovered had some good stroke of luck provided any of the partners
|
386 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ030-0125.wav|On several occasions when the Vice President's car was slowed down by the throng, Special Agent Youngblood stepped out to hold the crowd back.
|
387 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0140.wav|He also studied Dallas bus schedules to prepare for his later use of buses to travel to and from General Walker's house.
|
388 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0220.wav|In consequence of these disclosures, both Bambridge and Huggin, his predecessor in the office, were committed to Newgate,
|
389 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0117.wav|At one:twenty-nine p.m. the police radio reported
|
390 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0276.wav|The first plot was against Mr. Harry Emmanuel, but he escaped, and the attempt was made upon Loudon and Ryder.
|
391 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0077.wav|nor has he a right to poison or starve his fellow-creatures."
|
392 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0194.wav|they should not be confused with slowness, indecision or fear. Only the intellectually fearless could even be remotely attracted to our doctrine,
|
393 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0114.wav|The route chosen from the airport to Main Street was the normal one, except where Harwood Street was selected as the means of access to Main Street
|
394 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0194.wav|The policemen were now in possession;
|
395 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0027.wav|According to its microfilm records, Klein's received an order for a rifle on March thirteen, nineteen sixty-three,
|
396 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0289.wav|However, there is no evidence that these men failed to take any action in Dallas within their power that would have averted the tragedy.
|
397 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0188.wav|that he was the leader of a fascist organization, and when I said that even though all of that might be true, just the same he had no right to take his life,
|
398 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ011-0118.wav|In eighteen twenty-nine the gallows claimed two more victims for this offense.
|
399 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0201.wav|After her interview with Mrs. Oswald,
|
400 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0056.wav|While the rifle may have already been disassembled when Oswald arrived home on Thursday, he had ample time that evening to disassemble the rifle
|
401 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0073.wav|Hosty considered the information to be, quote, stale, unquote, by that time, and did not attempt to verify Oswald's reported statement.
|
402 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0153.wav|only nominally so, however, in many cases, since when he uses a headline he counts that in,
|
403 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0158.wav|or any kind of moral improvement was impossible; the prisoner's career was inevitably downward, till he struck the lowest depths.
|
404 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0502.wav|The Ishtar gateway leading to the palace was encased with beautiful blue glazed bricks,
|
405 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0226.wav|Though Herodotus wrote nearly a hundred years after Babylon fell, his story seems to bear the stamp of truth.
|
406 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0038.wav|as there had been before; as in the year eighteen forty-nine, a year memorable for the Rush murders at Norwich,
|
407 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0241.wav|But in the interval very comprehensive and, I think it must be admitted, salutary changes were successively introduced into the management of prisons.
|
408 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0094.wav|were induced to cut punches for a series of "old style" letters.
|
409 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0015.wav|the forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves.
|
410 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0015.wav|From defection to return to Fort Worth.
|
411 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0139.wav|since there was no background to the New Orleans FPCC, quote, organization, end quote, which consisted solely of Oswald.
|
412 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0031.wav|that the Secret Service consciously set about the task of inculcating and maintaining the highest standard of excellence and esprit, for all of its personnel.
|
413 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0235.wav|It has also used other Federal law enforcement agents during Presidential visits to cities in which such agents are stationed.
|
414 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0137.wav|FBI, and the Secret Service.
|
415 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0109.wav|At one:thirty-five p.m., after Governor Connally had been moved to the operating room, Dr. Shaw started the first operation
|
416 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0041.wav|He noted that the President was blue-white or ashen in color; had slow, spasmodic, agonal respiration without any coordination;
|
417 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0139.wav|There should be at least a full and fair trial given to these means of ending industrial warfare;
|
418 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0004.wav|The narrative of these events is based largely on the recollections of the participants,
|
419 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ023-0122.wav|It was said in last year's Democratic platform,
|
420 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0264.wav|inspectors of prisons should be appointed, who should visit all the prisons from time to time and report to the Secretary of State.
|
421 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0105.wav|and beyond it was a room called the "wine room," because formerly used for the sale of wine, but
|
422 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0035.wav|in the interests and for the due protection of the public, that the fullest and fairest inquiry should be made,
|
423 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0252.wav|Three of these agents occupied positions on the running boards of the car, and the fourth was seated in the car.
|
424 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0109.wav|The proceeds of the robbery were lodged in a Boston bank,
|
425 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0139.wav|Oswald obtained a hunting license, joined a hunting club and went hunting about six times, as discussed more fully in chapter six.
|
426 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0047.wav|that anyone ever attacked any street demonstration in which Oswald was involved, except for the Bringuier incident mentioned above,
|
427 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0417.wav|Catherine Wilson, the poisoner, was reserved and reticent to the last, expressing no contrition, but also no fear --
|
428 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0178.wav|he left his wedding ring in a cup on the dresser in his room. He also left one hundred seventy dollars in a wallet in one of the dresser drawers.
|
429 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0172.wav|While in London, for instance, in eighteen twenty-nine, twenty-four persons had been executed for crimes other than murder,
|
430 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0202.wav|incident to its responsibilities.
|
431 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0103.wav|The name "Hidell" was stamped on some of the "Chapter's" printed literature and on the membership application blanks.
|
432 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0091.wav|and Elder had to be assisted by two bank porters, who carried it for him to a carriage waiting near the Mansion House.
|
433 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0208.wav|nineteen dollars, ninety-five cents, plus one dollar, twenty-seven cents shipping charge, had been collected from the consignee, Hidell.
|
434 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0128.wav|her hair was dressed in long crepe bands. She had lace ruffles at her wrist, and wore primrose-colored kid gloves.
|
435 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0007.wav|This affected Cole's credit, and ugly reports were in circulation charging him with the issue of simulated warrants.
|
436 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ036-0169.wav|he would have reached his destination at approximately twelve:fifty-four p.m.
|
437 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0040.wav|The second step we have taken in the restoration of normal business enterprise
|
438 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0036.wav|The bank was already insolvent,
|
439 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0041.wav|Although Bureau experiments had shown that twenty-four hours was a likely maximum time, Latona stated
|
440 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0192.wav|The dissection of executed criminals was abolished soon after the discovery of the crime of burking,
|
441 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0248.wav|The eyewitnesses vary in their identification of the jacket.
|
442 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0289.wav|As each transaction was carried out from a different address, and a different messenger always employed,
|
443 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0072.wav|After a few years of active exertion the Society was rewarded by fresh legislation.
|
444 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ023-0047.wav|The three horses are, of course, the three branches of government -- the Congress, the Executive and the courts.
|
445 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0126.wav|Hardly any one.
|
446 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0097.wav|The window was approximately one hundred twenty feet away.
|
447 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0462.wav|They were laid in bitumen.
|
448 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0055.wav|It is now possible for Presidents to travel the length and breadth of a land far larger than the United States
|
449 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0371.wav|Yet the law was seldom if ever enforced.
|
450 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0207.wav|Although all of the shots were a few inches high and to the right of the target,
|
451 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0174.wav|Mr. Buxton's friends at once paid the forty shillings, and the boy was released.
|
452 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0233.wav|In his own profession
|
453 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0108.wav|It is clear that there are upward and downward currents of water containing food (comparable to blood of an animal),
|
454 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ038-0035.wav|Oswald rose from his seat, bringing up both hands.
|
455 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0148.wav|water which is lost by evaporation, especially from the leaf surface through the stomata;
|
456 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0186.wav|the position of our Society that a work of utility might be also a work of art, if we cared to make it so.
|
457 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0264.wav|The upturned faces of the eager spectators resembled those of the 'gods' at Drury Lane on Boxing Night;
|
458 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0041.wav|The occupants of this terrible black pew were the last always to enter the chapel.
|
459 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0297.wav|But there were other notorious cases of forgery.
|
460 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0018.wav|the Commission is not able to reach any definite conclusions as to whether or not he was, quote, sane, unquote, under prevailing legal standards.
|
461 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0253.wav|"to inquire into and report upon the several jails and houses of correction in the counties, cities, and corporate towns within England and Wales
|
462 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ027-0176.wav|Fishes first appeared in the Devonian and Upper Silurian in very reptilian or rather amphibian forms.
|
463 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0035.wav|The position of this palmprint on the carton was parallel with the long axis of the box, and at right angles with the short axis;
|
464 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0054.wav|But he did not like the risk of entering a room by the fireplace, and the chances of detection it offered.
|
465 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0262.wav|Roupell received the announcement with a cheerful countenance,
|
466 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0237.wav|with thirteen dollars, eighty-seven cents when considerably greater resources were available to him.
|
467 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0166.wav|Two other witnesses were able to offer partial descriptions of a man they saw in the southeast corner window
|
468 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0238.wav|"just to steady their legs a little;" in other words, to add his weight to that of the hanging bodies.
|
469 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0198.wav|The discussion above has already set forth examples of his expression of hatred for the United States.
|
470 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0189.wav|At two:thirty-eight p.m., Eastern Standard Time, Lyndon Baines Johnson took the oath of office as the thirty-sixth President of the United States.
|
471 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0084.wav|or, quote, other high government officials in the nature of a complaint coupled with an expressed or implied determination to use a means,
|
472 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0158.wav|As for my return entrance visa please consider it separately. End quote.
|
473 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0082.wav|it appears that Marina Oswald also complained that her husband was not able to provide more material things for her.
|
474 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0190.wav|appeared in The Dallas Times Herald on November fifteen, nineteen sixty-three.
|
475 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ035-0155.wav|The only exit from the office in the direction Oswald was moving was through the door to the front stairway.
|
476 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ044-0004.wav|Political Activities
|
477 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0016.wav|The Commission has not undertaken a comprehensive examination of all facets of this subject;
|
478 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0368.wav|The latter too was to be laid before the House of Commons.
|
479 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ010-0062.wav|But they proceeded in all seriousness, and would have shrunk from no outrage or atrocity in furtherance of their foolhardy enterprise.
|
480 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0159.wav|It was from Oswald's right hand, in which he carried the long package as he walked from Frazier's car to the building.
|
481 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0171.wav|The boy declared he saw no one, and accordingly passed through without paying the toll of a penny.
|
482 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0298.wav|in his evidence in eighteen fourteen, said it was more,
|
483 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0219.wav|and in one corner, at some depth, a bundle of clothes were unearthed, which, with a hairy cap,
|
484 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0190.wav|After this came the charge of administering oil of vitriol, which failed, as has been described.
|
485 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0179.wav|This, with a scheme for limiting the jail to untried prisoners, had been urgently recommended by Lord John Russell in eighteen thirty.
|
486 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0188.wav|each patrolman might be given a prepared booklet of instructions explaining what is expected of him. The Secret Service has expressed concern
|
487 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0043.wav|The disgraceful overcrowding had been partially ended, but the same evils of indiscriminate association were still present; there was the old neglect of decency,
|
488 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0060.wav|A number of people who resembled some of those in the photographs were placed under surveillance at the Trade Mart.
|
489 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0052.wav|Both systems came to us from the United States. The difference was really more in degree than in principle,
|
490 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0081.wav|Later in the day each woman found an empty shell on the ground near the house. These two shells were delivered to the police.
|
491 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0200.wav|paying particular attention to the crowd for any unusual activity.
|
492 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0426.wav|come along, gallows.
|
493 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0182.wav|A tremendous crowd assembled when Bellingham was executed in eighteen twelve for the murder of Spencer Percival, at that time prime minister;
|
494 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0107.wav|Upon moving to New Orleans on April twenty-four, nineteen sixty-three,
|
495 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ006-0084.wav|and so numerous were his opportunities of showing favoritism, that all the prisoners may be said to be in his power.
|
496 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ025-0081.wav|has no permanent digestive cavity or mouth, but takes in its food anywhere and digests, so to speak, all over its body.
|
497 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0042.wav|These were either satisfied with a makeshift, and modified existing buildings, without close regard to their suitability, or for a long time did nothing at all.
|
498 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0240.wav|They agree that Hosty told Revill
|
499 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ032-0012.wav|the resistance to arrest and the attempted shooting of another police officer by the man (Lee Harvey Oswald) subsequently accused of assassinating President Kennedy
|
500 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0209.wav|The assistant to the Director of the FBI testified that
|
filelists/ljs_audio_text_train_filelist.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
filelists/ljs_audio_text_val_filelist.txt
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ022-0023.wav|The overwhelming majority of people in this country know how to sift the wheat from the chaff in what they hear and what they read.
|
2 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0030.wav|If somebody did that to me, a lousy trick like that, to take my wife away, and all the furniture, I would be mad as hell, too.
|
3 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0201.wav|as is shown by the report of the Commissioners to inquire into the state of the municipal corporations in eighteen thirty-five.
|
4 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ001-0110.wav|Even the Caslon type when enlarged shows great shortcomings in this respect:
|
5 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0345.wav|All the committee could do in this respect was to throw the responsibility on others.
|
6 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ007-0154.wav|These pungent and well-grounded strictures applied with still greater force to the unconvicted prisoner, the man who came to the prison innocent, and still uncontaminated,
|
7 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0098.wav|and recognized as one of the frequenters of the bogus law-stationers. His arrest led to that of others.
|
8 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0044.wav|Oswald was, however, willing to discuss his contacts with Soviet authorities. He denied having any involvement with Soviet intelligence agencies
|
9 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0038.wav|The first physician to see the President at Parkland Hospital was Dr. Charles J. Carrico, a resident in general surgery.
|
10 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0194.wav|during the morning of November twenty-two prior to the motorcade.
|
11 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0026.wav|On occasion the Secret Service has been permitted to have an agent riding in the passenger compartment with the President.
|
12 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0152.wav|although at Mr. Buxton's visit a new jail was in process of erection, the first step towards reform since Howard's visitation in seventeen seventy-four.
|
13 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0278.wav|or theirs might be one of many, and it might be considered necessary to "make an example."
|
14 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ043-0002.wav|The Warren Commission Report. By The President's Commission on the Assassination of President Kennedy. Chapter seven. Lee Harvey Oswald:
|
15 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0114.wav|Mr. Wakefield winds up his graphic but somewhat sensational account by describing another religious service, which may appropriately be inserted here.
|
16 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0506.wav|A modern artist would have difficulty in doing such accurate work.
|
17 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ050-0168.wav|with the particular purposes of the agency involved. The Commission recognizes that this is a controversial area
|
18 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0223.wav|Oswald's Marine training in marksmanship, his other rifle experience and his established familiarity with this particular weapon
|
19 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0032.wav|According to O'Donnell, quote, we had a motorcade wherever we went, end quote.
|
20 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0070.wav|Dr. Clark, who most closely observed the head wound,
|
21 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0198.wav|Euins, who was on the southwest corner of Elm and Houston Streets testified that he could not describe the man he saw in the window.
|
22 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ026-0068.wav|Energy enters the plant, to a small extent,
|
23 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ039-0075.wav|once you know that you must put the crosshairs on the target and that is all that is necessary.
|
24 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0096.wav|the fatal consequences whereof might be prevented if the justices of the peace were duly authorized
|
25 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0014.wav|Speaking on a debate on prison matters, he declared that
|
26 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0161.wav|he was reported to have fallen away to a shadow.
|
27 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0239.wav|His disappearance gave color and substance to evil reports already in circulation that the will and conveyance above referred to
|
28 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0257.wav|Here the tread-wheel was in use, there cellular cranks, or hard-labor machines.
|
29 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0008.wav|you tap gently with your heel upon the shoulder of the dromedary to urge her on.
|
30 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ024-0083.wav|This plan of mine is no attack on the Court;
|
31 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0129.wav|No night clubs or bowling alleys, no places of recreation except the trade union dances. I have had enough.
|
32 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ036-0103.wav|The police asked him whether he could pick out his passenger from the lineup.
|
33 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0058.wav|During his Presidency, Franklin D. Roosevelt made almost four hundred journeys and traveled more than three hundred fifty thousand miles.
|
34 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0076.wav|He was seen afterwards smoking and talking with his hosts in their back parlor, and never seen again alive.
|
35 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0043.wav|long narrow rooms -- one thirty-six feet, six twenty-three feet, and the eighth eighteen,
|
36 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0076.wav|We come to the sermon.
|
37 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0131.wav|even when the high sheriff had told him there was no possibility of a reprieve, and within a few hours of execution.
|
38 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0184.wav|but there is a system for the immediate notification of the Secret Service by the confining institution when a subject is released or escapes.
|
39 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0263.wav|When other pleasures palled he took a theatre, and posed as a munificent patron of the dramatic art.
|
40 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ042-0096.wav|(old exchange rate) in addition to his factory salary of approximately equal amount
|
41 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ049-0050.wav|Hill had both feet on the car and was climbing aboard to assist President and Mrs. Kennedy.
|
42 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0186.wav|seeing that since the establishment of the Central Criminal Court, Newgate received prisoners for trial from several counties,
|
43 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0307.wav|then let twenty days pass, and at the end of that time station near the Chaldasan gates a body of four thousand.
|
44 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0235.wav|While they were in a state of insensibility the murder was committed.
|
45 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0053.wav|reached the same conclusion as Latona that the prints found on the cartons were those of Lee Harvey Oswald.
|
46 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0030.wav|These were damnatory facts which well supported the prosecution.
|
47 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ015-0203.wav|but were the precautions too minute, the vigilance too close to be eluded or overcome?
|
48 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0093.wav|but his scribe wrote it in the manner customary for the scribes of those days to write of their royal masters.
|
49 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ002-0018.wav|The inadequacy of the jail was noticed and reported upon again and again by the grand juries of the city of London,
|
50 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0275.wav|At last, in the twentieth month,
|
51 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0042.wav|which he kept concealed in a hiding-place with a trap-door just under his bed.
|
52 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ011-0096.wav|He married a lady also belonging to the Society of Friends, who brought him a large fortune, which, and his own money, he put into a city firm,
|
53 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ036-0077.wav|Roger D. Craig, a deputy sheriff of Dallas County,
|
54 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0318.wav|Other officials, great lawyers, governors of prisons, and chaplains supported this view.
|
55 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ013-0164.wav|who came from his room ready dressed, a suspicious circumstance, as he was always late in the morning.
|
56 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ027-0141.wav|is closely reproduced in the life-history of existing deer. Or, in other words,
|
57 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0335.wav|accordingly they committed to him the command of their whole army, and put the keys of their city into his hands.
|
58 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0202.wav|Mrs. Kennedy chose the hospital in Bethesda for the autopsy because the President had served in the Navy.
|
59 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0145.wav|From those willing to join in establishing this hoped-for period of peace,
|
60 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0288.wav|"Müller, Müller, He's the man," till a diversion was created by the appearance of the gallows, which was received with continuous yells.
|
61 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ028-0081.wav|Years later, when the archaeologists could readily distinguish the false from the true,
|
62 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ018-0081.wav|his defense being that he had intended to commit suicide, but that, on the appearance of this officer who had wronged him,
|
63 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ021-0066.wav|together with a great increase in the payrolls, there has come a substantial rise in the total of industrial profits
|
64 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ009-0238.wav|After this the sheriffs sent for another rope, but the spectators interfered, and the man was carried back to jail.
|
65 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ005-0079.wav|and improve the morals of the prisoners, and shall insure the proper measure of punishment to convicted offenders.
|
66 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ035-0019.wav|drove to the northwest corner of Elm and Houston, and parked approximately ten feet from the traffic signal.
|
67 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ036-0174.wav|This is the approximate time he entered the roominghouse, according to Earlene Roberts, the housekeeper there.
|
68 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ046-0146.wav|The criteria in effect prior to November twenty-two, nineteen sixty-three, for determining whether to accept material for the PRS general files
|
69 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0044.wav|and the deepest anxiety was felt that the crime, if crime there had been, should be brought home to its perpetrator.
|
70 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ017-0070.wav|but his sporting operations did not prosper, and he became a needy man, always driven to desperate straits for cash.
|
71 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0020.wav|He was soon afterwards arrested on suspicion, and a search of his lodgings brought to light several garments saturated with blood;
|
72 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0020.wav|He never reached the cistern, but fell back into the yard, injuring his legs severely.
|
73 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0230.wav|when he was finally apprehended in the Texas Theatre. Although it is not fully corroborated by others who were present,
|
74 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ035-0129.wav|and she must have run down the stairs ahead of Oswald and would probably have seen or heard him.
|
75 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0307.wav|afterwards express a wish to murder the Recorder for having kept them so long in suspense.
|
76 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0294.wav|nearly indefinitely deferred.
|
77 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ047-0148.wav|On October twenty-five,
|
78 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0111.wav|They entered a "stone cold room," and were presently joined by the prisoner.
|
79 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0042.wav|that he could only testify with certainty that the print was less than three days old.
|
80 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ037-0234.wav|Mrs. Mary Brock, the wife of a mechanic who worked at the station, was there at the time and she saw a white male,
|
81 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0002.wav|Chapter seven. Lee Harvey Oswald: Background and Possible Motives, Part one.
|
82 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ045-0140.wav|The arguments he used to justify his use of the alias suggest that Oswald may have come to think that the whole world was becoming involved
|
83 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0035.wav|the number and names on watches, were carefully removed or obliterated after the goods passed out of his hands.
|
84 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ012-0250.wav|On the seventh July, eighteen thirty-seven,
|
85 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0179.wav|contracted with sheriffs and conveners to work by the job.
|
86 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ016-0138.wav|at a distance from the prison.
|
87 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ027-0052.wav|These principles of homology are essential to a correct interpretation of the facts of morphology.
|
88 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ031-0134.wav|On one occasion Mrs. Johnson, accompanied by two Secret Service agents, left the room to see Mrs. Kennedy and Mrs. Connally.
|
89 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ019-0273.wav|which Sir Joshua Jebb told the committee he considered the proper elements of penal discipline.
|
90 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0110.wav|At the first the boxes were impounded, opened, and found to contain many of O'Connor's effects.
|
91 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ034-0160.wav|on Brennan's subsequent certain identification of Lee Harvey Oswald as the man he saw fire the rifle.
|
92 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ038-0199.wav|eleven. If I am alive and taken prisoner,
|
93 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ014-0010.wav|yet he could not overcome the strange fascination it had for him, and remained by the side of the corpse till the stretcher came.
|
94 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ033-0047.wav|I noticed when I went out that the light was on, end quote,
|
95 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ040-0027.wav|He was never satisfied with anything.
|
96 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ048-0228.wav|and others who were present say that no agent was inebriated or acted improperly.
|
97 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ003-0111.wav|He was in consequence put out of the protection of their internal law, end quote. Their code was a subject of some curiosity.
|
98 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ008-0258.wav|Let me retrace my steps, and speak more in detail of the treatment of the condemned in those bloodthirsty and brutally indifferent days,
|
99 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ029-0022.wav|The original plan called for the President to spend only one day in the State, making whirlwind visits to Dallas, Fort Worth, San Antonio, and Houston.
|
100 |
+
/homedtic/apeiro/LJSpeech-1.1/wavs/LJ004-0045.wav|Mr. Sturges Bourne, Sir James Mackintosh, Sir James Scarlett, and William Wilberforce.
|
fp16_optimizer.py
ADDED
@@ -0,0 +1,385 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from torch.autograd import Variable
|
4 |
+
from torch.nn.parameter import Parameter
|
5 |
+
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
|
6 |
+
|
7 |
+
from loss_scaler import DynamicLossScaler, LossScaler
|
8 |
+
|
9 |
+
FLOAT_TYPES = (torch.FloatTensor, torch.cuda.FloatTensor)
|
10 |
+
HALF_TYPES = (torch.HalfTensor, torch.cuda.HalfTensor)
|
11 |
+
|
12 |
+
|
13 |
+
def conversion_helper(val, conversion):
|
14 |
+
"""Apply conversion to val. Recursively apply conversion if `val` is a nested tuple/list structure."""
|
15 |
+
if not isinstance(val, (tuple, list)):
|
16 |
+
return conversion(val)
|
17 |
+
rtn = [conversion_helper(v, conversion) for v in val]
|
18 |
+
if isinstance(val, tuple):
|
19 |
+
rtn = tuple(rtn)
|
20 |
+
return rtn
|
21 |
+
|
22 |
+
|
23 |
+
def fp32_to_fp16(val):
|
24 |
+
"""Convert fp32 `val` to fp16"""
|
25 |
+
def half_conversion(val):
|
26 |
+
val_typecheck = val
|
27 |
+
if isinstance(val_typecheck, (Parameter, Variable)):
|
28 |
+
val_typecheck = val.data
|
29 |
+
if isinstance(val_typecheck, FLOAT_TYPES):
|
30 |
+
val = val.half()
|
31 |
+
return val
|
32 |
+
return conversion_helper(val, half_conversion)
|
33 |
+
|
34 |
+
|
35 |
+
def fp16_to_fp32(val):
|
36 |
+
"""Convert fp16 `val` to fp32"""
|
37 |
+
def float_conversion(val):
|
38 |
+
val_typecheck = val
|
39 |
+
if isinstance(val_typecheck, (Parameter, Variable)):
|
40 |
+
val_typecheck = val.data
|
41 |
+
if isinstance(val_typecheck, HALF_TYPES):
|
42 |
+
val = val.float()
|
43 |
+
return val
|
44 |
+
return conversion_helper(val, float_conversion)
|
45 |
+
|
46 |
+
|
47 |
+
class FP16_Module(nn.Module):
|
48 |
+
def __init__(self, module):
|
49 |
+
super(FP16_Module, self).__init__()
|
50 |
+
self.add_module('module', module.half())
|
51 |
+
|
52 |
+
def forward(self, *inputs, **kwargs):
|
53 |
+
return fp16_to_fp32(self.module(*(fp32_to_fp16(inputs)), **kwargs))
|
54 |
+
|
55 |
+
|
56 |
+
class FP16_Optimizer(object):
|
57 |
+
"""
|
58 |
+
FP16_Optimizer is designed to wrap an existing PyTorch optimizer,
|
59 |
+
and enable an fp16 model to be trained using a master copy of fp32 weights.
|
60 |
+
|
61 |
+
Args:
|
62 |
+
optimizer (torch.optim.optimizer): Existing optimizer containing initialized fp16 parameters. Internally, FP16_Optimizer replaces the passed optimizer's fp16 parameters with new fp32 parameters copied from the original ones. FP16_Optimizer also stores references to the original fp16 parameters, and updates these fp16 parameters from the master fp32 copy after each step.
|
63 |
+
static_loss_scale (float, optional, default=1.0): Loss scale used internally to scale fp16 gradients computed by the model. Scaled gradients will be copied to fp32, then downscaled before being applied to the fp32 master params, so static_loss_scale should not affect learning rate.
|
64 |
+
dynamic_loss_scale (bool, optional, default=False): Use dynamic loss scaling. If True, this will override any static_loss_scale option.
|
65 |
+
|
66 |
+
"""
|
67 |
+
|
68 |
+
def __init__(self, optimizer, static_loss_scale=1.0, dynamic_loss_scale=False):
|
69 |
+
if not torch.cuda.is_available:
|
70 |
+
raise SystemError('Cannot use fp16 without CUDA')
|
71 |
+
|
72 |
+
self.fp16_param_groups = []
|
73 |
+
self.fp32_param_groups = []
|
74 |
+
self.fp32_flattened_groups = []
|
75 |
+
for i, param_group in enumerate(optimizer.param_groups):
|
76 |
+
print("FP16_Optimizer processing param group {}:".format(i))
|
77 |
+
fp16_params_this_group = []
|
78 |
+
fp32_params_this_group = []
|
79 |
+
for param in param_group['params']:
|
80 |
+
if param.requires_grad:
|
81 |
+
if param.type() == 'torch.cuda.HalfTensor':
|
82 |
+
print("FP16_Optimizer received torch.cuda.HalfTensor with {}"
|
83 |
+
.format(param.size()))
|
84 |
+
fp16_params_this_group.append(param)
|
85 |
+
elif param.type() == 'torch.cuda.FloatTensor':
|
86 |
+
print("FP16_Optimizer received torch.cuda.FloatTensor with {}"
|
87 |
+
.format(param.size()))
|
88 |
+
fp32_params_this_group.append(param)
|
89 |
+
else:
|
90 |
+
raise TypeError("Wrapped parameters must be either "
|
91 |
+
"torch.cuda.FloatTensor or torch.cuda.HalfTensor. "
|
92 |
+
"Received {}".format(param.type()))
|
93 |
+
|
94 |
+
fp32_flattened_this_group = None
|
95 |
+
if len(fp16_params_this_group) > 0:
|
96 |
+
fp32_flattened_this_group = _flatten_dense_tensors(
|
97 |
+
[param.detach().data.clone().float() for param in fp16_params_this_group])
|
98 |
+
|
99 |
+
fp32_flattened_this_group = Variable(fp32_flattened_this_group, requires_grad = True)
|
100 |
+
|
101 |
+
fp32_flattened_this_group.grad = fp32_flattened_this_group.new(
|
102 |
+
*fp32_flattened_this_group.size())
|
103 |
+
|
104 |
+
# python's lovely list concatenation via +
|
105 |
+
if fp32_flattened_this_group is not None:
|
106 |
+
param_group['params'] = [fp32_flattened_this_group] + fp32_params_this_group
|
107 |
+
else:
|
108 |
+
param_group['params'] = fp32_params_this_group
|
109 |
+
|
110 |
+
self.fp16_param_groups.append(fp16_params_this_group)
|
111 |
+
self.fp32_param_groups.append(fp32_params_this_group)
|
112 |
+
self.fp32_flattened_groups.append(fp32_flattened_this_group)
|
113 |
+
|
114 |
+
# print("self.fp32_flattened_groups = ", self.fp32_flattened_groups)
|
115 |
+
# print("self.fp16_param_groups = ", self.fp16_param_groups)
|
116 |
+
|
117 |
+
self.optimizer = optimizer.__class__(optimizer.param_groups)
|
118 |
+
|
119 |
+
# self.optimizer.load_state_dict(optimizer.state_dict())
|
120 |
+
|
121 |
+
self.param_groups = self.optimizer.param_groups
|
122 |
+
|
123 |
+
if dynamic_loss_scale:
|
124 |
+
self.dynamic_loss_scale = True
|
125 |
+
self.loss_scaler = DynamicLossScaler()
|
126 |
+
else:
|
127 |
+
self.dynamic_loss_scale = False
|
128 |
+
self.loss_scaler = LossScaler(static_loss_scale)
|
129 |
+
|
130 |
+
self.overflow = False
|
131 |
+
self.first_closure_call_this_step = True
|
132 |
+
|
133 |
+
def zero_grad(self):
|
134 |
+
"""
|
135 |
+
Zero fp32 and fp16 parameter grads.
|
136 |
+
"""
|
137 |
+
self.optimizer.zero_grad()
|
138 |
+
for fp16_group in self.fp16_param_groups:
|
139 |
+
for param in fp16_group:
|
140 |
+
if param.grad is not None:
|
141 |
+
param.grad.detach_() # This does appear in torch.optim.optimizer.zero_grad(),
|
142 |
+
# but I'm not sure why it's needed.
|
143 |
+
param.grad.zero_()
|
144 |
+
|
145 |
+
def _check_overflow(self):
|
146 |
+
params = []
|
147 |
+
for group in self.fp16_param_groups:
|
148 |
+
for param in group:
|
149 |
+
params.append(param)
|
150 |
+
for group in self.fp32_param_groups:
|
151 |
+
for param in group:
|
152 |
+
params.append(param)
|
153 |
+
self.overflow = self.loss_scaler.has_overflow(params)
|
154 |
+
|
155 |
+
def _update_scale(self, has_overflow=False):
|
156 |
+
self.loss_scaler.update_scale(has_overflow)
|
157 |
+
|
158 |
+
def _copy_grads_fp16_to_fp32(self):
|
159 |
+
for fp32_group, fp16_group in zip(self.fp32_flattened_groups, self.fp16_param_groups):
|
160 |
+
if len(fp16_group) > 0:
|
161 |
+
# This might incur one more deep copy than is necessary.
|
162 |
+
fp32_group.grad.data.copy_(
|
163 |
+
_flatten_dense_tensors([fp16_param.grad.data for fp16_param in fp16_group]))
|
164 |
+
|
165 |
+
def _downscale_fp32(self):
|
166 |
+
if self.loss_scale != 1.0:
|
167 |
+
for param_group in self.optimizer.param_groups:
|
168 |
+
for param in param_group['params']:
|
169 |
+
param.grad.data.mul_(1./self.loss_scale)
|
170 |
+
|
171 |
+
def clip_fp32_grads(self, clip=-1):
|
172 |
+
if not self.overflow:
|
173 |
+
fp32_params = []
|
174 |
+
for param_group in self.optimizer.param_groups:
|
175 |
+
for param in param_group['params']:
|
176 |
+
fp32_params.append(param)
|
177 |
+
if clip > 0:
|
178 |
+
return torch.nn.utils.clip_grad_norm_(fp32_params, clip)
|
179 |
+
|
180 |
+
def _copy_params_fp32_to_fp16(self):
|
181 |
+
for fp16_group, fp32_group in zip(self.fp16_param_groups, self.fp32_flattened_groups):
|
182 |
+
if len(fp16_group) > 0:
|
183 |
+
for fp16_param, fp32_data in zip(fp16_group, _unflatten_dense_tensors(fp32_group.data, fp16_group)):
|
184 |
+
fp16_param.data.copy_(fp32_data)
|
185 |
+
|
186 |
+
def state_dict(self):
|
187 |
+
"""
|
188 |
+
Returns a dict containing the current state of this FP16_Optimizer instance.
|
189 |
+
This dict contains attributes of FP16_Optimizer, as well as the state_dict
|
190 |
+
of the contained Pytorch optimizer.
|
191 |
+
|
192 |
+
Untested.
|
193 |
+
"""
|
194 |
+
state_dict = {}
|
195 |
+
state_dict['loss_scaler'] = self.loss_scaler
|
196 |
+
state_dict['dynamic_loss_scale'] = self.dynamic_loss_scale
|
197 |
+
state_dict['overflow'] = self.overflow
|
198 |
+
state_dict['first_closure_call_this_step'] = self.first_closure_call_this_step
|
199 |
+
state_dict['optimizer_state_dict'] = self.optimizer.state_dict()
|
200 |
+
return state_dict
|
201 |
+
|
202 |
+
def load_state_dict(self, state_dict):
|
203 |
+
"""
|
204 |
+
Loads a state_dict created by an earlier call to state_dict.
|
205 |
+
|
206 |
+
Untested.
|
207 |
+
"""
|
208 |
+
self.loss_scaler = state_dict['loss_scaler']
|
209 |
+
self.dynamic_loss_scale = state_dict['dynamic_loss_scale']
|
210 |
+
self.overflow = state_dict['overflow']
|
211 |
+
self.first_closure_call_this_step = state_dict['first_closure_call_this_step']
|
212 |
+
self.optimizer.load_state_dict(state_dict['optimizer_state_dict'])
|
213 |
+
|
214 |
+
def step(self, closure=None): # could add clip option.
|
215 |
+
"""
|
216 |
+
If no closure is supplied, step should be called after fp16_optimizer_obj.backward(loss).
|
217 |
+
step updates the fp32 master copy of parameters using the optimizer supplied to
|
218 |
+
FP16_Optimizer's constructor, then copies the updated fp32 params into the fp16 params
|
219 |
+
originally referenced by Fp16_Optimizer's constructor, so the user may immediately run
|
220 |
+
another forward pass using their model.
|
221 |
+
|
222 |
+
If a closure is supplied, step may be called without a prior call to self.backward(loss).
|
223 |
+
However, the user should take care that any loss.backward() call within the closure
|
224 |
+
has been replaced by fp16_optimizer_obj.backward(loss).
|
225 |
+
|
226 |
+
Args:
|
227 |
+
closure (optional): Closure that will be supplied to the underlying optimizer originally passed to FP16_Optimizer's constructor. closure should call zero_grad on the FP16_Optimizer object, compute the loss, call .backward(loss), and return the loss.
|
228 |
+
|
229 |
+
Closure example::
|
230 |
+
|
231 |
+
# optimizer is assumed to be an FP16_Optimizer object, previously constructed from an
|
232 |
+
# existing pytorch optimizer.
|
233 |
+
for input, target in dataset:
|
234 |
+
def closure():
|
235 |
+
optimizer.zero_grad()
|
236 |
+
output = model(input)
|
237 |
+
loss = loss_fn(output, target)
|
238 |
+
optimizer.backward(loss)
|
239 |
+
return loss
|
240 |
+
optimizer.step(closure)
|
241 |
+
|
242 |
+
.. note::
|
243 |
+
The only changes that need to be made compared to
|
244 |
+
`ordinary optimizer closures`_ are that "optimizer" itself should be an instance of
|
245 |
+
FP16_Optimizer, and that the call to loss.backward should be replaced by
|
246 |
+
optimizer.backward(loss).
|
247 |
+
|
248 |
+
.. warning::
|
249 |
+
Currently, calling step with a closure is not compatible with dynamic loss scaling.
|
250 |
+
|
251 |
+
.. _`ordinary optimizer closures`:
|
252 |
+
http://pytorch.org/docs/master/optim.html#optimizer-step-closure
|
253 |
+
"""
|
254 |
+
if closure is not None and isinstance(self.loss_scaler, DynamicLossScaler):
|
255 |
+
raise TypeError("Using step with a closure is currently not "
|
256 |
+
"compatible with dynamic loss scaling.")
|
257 |
+
|
258 |
+
scale = self.loss_scaler.loss_scale
|
259 |
+
self._update_scale(self.overflow)
|
260 |
+
|
261 |
+
if self.overflow:
|
262 |
+
print("OVERFLOW! Skipping step. Attempted loss scale: {}".format(scale))
|
263 |
+
return
|
264 |
+
|
265 |
+
if closure is not None:
|
266 |
+
self._step_with_closure(closure)
|
267 |
+
else:
|
268 |
+
self.optimizer.step()
|
269 |
+
|
270 |
+
self._copy_params_fp32_to_fp16()
|
271 |
+
|
272 |
+
return
|
273 |
+
|
274 |
+
def _step_with_closure(self, closure):
|
275 |
+
def wrapped_closure():
|
276 |
+
if self.first_closure_call_this_step:
|
277 |
+
"""
|
278 |
+
We expect that the fp16 params are initially fresh on entering self.step(),
|
279 |
+
so _copy_params_fp32_to_fp16() is unnecessary the first time wrapped_closure()
|
280 |
+
is called within self.optimizer.step().
|
281 |
+
"""
|
282 |
+
self.first_closure_call_this_step = False
|
283 |
+
else:
|
284 |
+
"""
|
285 |
+
If self.optimizer.step() internally calls wrapped_closure more than once,
|
286 |
+
it may update the fp32 params after each call. However, self.optimizer
|
287 |
+
doesn't know about the fp16 params at all. If the fp32 params get updated,
|
288 |
+
we can't rely on self.optimizer to refresh the fp16 params. We need
|
289 |
+
to handle that manually:
|
290 |
+
"""
|
291 |
+
self._copy_params_fp32_to_fp16()
|
292 |
+
|
293 |
+
"""
|
294 |
+
Our API expects the user to give us ownership of the backward() call by
|
295 |
+
replacing all calls to loss.backward() with optimizer.backward(loss).
|
296 |
+
This requirement holds whether or not the call to backward() is made within
|
297 |
+
a closure.
|
298 |
+
If the user is properly calling optimizer.backward(loss) within "closure,"
|
299 |
+
calling closure() here will give the fp32 master params fresh gradients
|
300 |
+
for the optimizer to play with,
|
301 |
+
so all wrapped_closure needs to do is call closure() and return the loss.
|
302 |
+
"""
|
303 |
+
temp_loss = closure()
|
304 |
+
return temp_loss
|
305 |
+
|
306 |
+
self.optimizer.step(wrapped_closure)
|
307 |
+
|
308 |
+
self.first_closure_call_this_step = True
|
309 |
+
|
310 |
+
def backward(self, loss, update_fp32_grads=True):
|
311 |
+
"""
|
312 |
+
fp16_optimizer_obj.backward performs the following conceptual operations:
|
313 |
+
|
314 |
+
fp32_loss = loss.float() (see first Note below)
|
315 |
+
|
316 |
+
scaled_loss = fp32_loss*loss_scale
|
317 |
+
|
318 |
+
scaled_loss.backward(), which accumulates scaled gradients into the .grad attributes of the
|
319 |
+
fp16 model's leaves.
|
320 |
+
|
321 |
+
fp16 grads are then copied to the stored fp32 params' .grad attributes (see second Note).
|
322 |
+
|
323 |
+
Finally, fp32 grads are divided by loss_scale.
|
324 |
+
|
325 |
+
In this way, after fp16_optimizer_obj.backward, the fp32 parameters have fresh gradients,
|
326 |
+
and fp16_optimizer_obj.step may be called.
|
327 |
+
|
328 |
+
.. note::
|
329 |
+
Converting the loss to fp32 before applying the loss scale provides some
|
330 |
+
additional safety against overflow if the user has supplied an fp16 value.
|
331 |
+
However, for maximum overflow safety, the user should
|
332 |
+
compute the loss criterion (MSE, cross entropy, etc) in fp32 before supplying it to
|
333 |
+
fp16_optimizer_obj.backward.
|
334 |
+
|
335 |
+
.. note::
|
336 |
+
The gradients found in an fp16 model's leaves after a call to
|
337 |
+
fp16_optimizer_obj.backward should not be regarded as valid in general,
|
338 |
+
because it's possible
|
339 |
+
they have been scaled (and in the case of dynamic loss scaling,
|
340 |
+
the scale factor may silently change over time).
|
341 |
+
If the user wants to inspect gradients after a call to fp16_optimizer_obj.backward,
|
342 |
+
he/she should query the .grad attribute of FP16_Optimizer's stored fp32 parameters.
|
343 |
+
|
344 |
+
Args:
|
345 |
+
loss: The loss output by the user's model. loss may be either float or half (but see first Note above).
|
346 |
+
update_fp32_grads (bool, optional, default=True): Option to copy fp16 grads to fp32 grads on this call. By setting this to False, the user can delay this copy, which is useful to eliminate redundant fp16->fp32 grad copies if fp16_optimizer_obj.backward is being called on multiple losses in one iteration. If set to False, the user becomes responsible for calling fp16_optimizer_obj.update_fp32_grads before calling fp16_optimizer_obj.step.
|
347 |
+
|
348 |
+
Example::
|
349 |
+
|
350 |
+
# Ordinary operation:
|
351 |
+
optimizer.backward(loss)
|
352 |
+
|
353 |
+
# Naive operation with multiple losses (technically valid, but less efficient):
|
354 |
+
# fp32 grads will be correct after the second call, but
|
355 |
+
# the first call incurs an unnecessary fp16->fp32 grad copy.
|
356 |
+
optimizer.backward(loss1)
|
357 |
+
optimizer.backward(loss2)
|
358 |
+
|
359 |
+
# More efficient way to handle multiple losses:
|
360 |
+
# The fp16->fp32 grad copy is delayed until fp16 grads from all
|
361 |
+
# losses have been accumulated.
|
362 |
+
optimizer.backward(loss1, update_fp32_grads=False)
|
363 |
+
optimizer.backward(loss2, update_fp32_grads=False)
|
364 |
+
optimizer.update_fp32_grads()
|
365 |
+
"""
|
366 |
+
self.loss_scaler.backward(loss.float())
|
367 |
+
if update_fp32_grads:
|
368 |
+
self.update_fp32_grads()
|
369 |
+
|
370 |
+
def update_fp32_grads(self):
|
371 |
+
"""
|
372 |
+
Copy the .grad attribute from stored references to fp16 parameters to
|
373 |
+
the .grad attribute of the master fp32 parameters that are directly
|
374 |
+
updated by the optimizer. :attr:`update_fp32_grads` only needs to be called if
|
375 |
+
fp16_optimizer_obj.backward was called with update_fp32_grads=False.
|
376 |
+
"""
|
377 |
+
if self.dynamic_loss_scale:
|
378 |
+
self._check_overflow()
|
379 |
+
if self.overflow: return
|
380 |
+
self._copy_grads_fp16_to_fp32()
|
381 |
+
self._downscale_fp32()
|
382 |
+
|
383 |
+
@property
|
384 |
+
def loss_scale(self):
|
385 |
+
return self.loss_scaler.loss_scale
|
hifigan/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2020 Jungil Kong
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
hifigan/LJSpeech-1.1/training.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
hifigan/LJSpeech-1.1/validation.txt
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LJ050-0269|The essential terms of such memoranda might well be embodied in an Executive order.|The essential terms of such memoranda might well be embodied in an Executive order.
|
2 |
+
LJ050-0270|This Commission can recommend no procedures for the future protection of our Presidents which will guarantee security.|This Commission can recommend no procedures for the future protection of our Presidents which will guarantee security.
|
3 |
+
LJ050-0271|The demands on the President in the execution of His responsibilities in today's world are so varied and complex|The demands on the President in the execution of His responsibilities in today's world are so varied and complex
|
4 |
+
LJ050-0272|and the traditions of the office in a democracy such as ours are so deep-seated as to preclude absolute security.|and the traditions of the office in a democracy such as ours are so deep-seated as to preclude absolute security.
|
5 |
+
LJ050-0273|The Commission has, however, from its examination of the facts of President Kennedy's assassination|The Commission has, however, from its examination of the facts of President Kennedy's assassination
|
6 |
+
LJ050-0274|made certain recommendations which it believes would, if adopted,|made certain recommendations which it believes would, if adopted,
|
7 |
+
LJ050-0275|materially improve upon the procedures in effect at the time of President Kennedy's assassination and result in a substantial lessening of the danger.|materially improve upon the procedures in effect at the time of President Kennedy's assassination and result in a substantial lessening of the danger.
|
8 |
+
LJ050-0276|As has been pointed out, the Commission has not resolved all the proposals which could be made. The Commission nevertheless is confident that,|As has been pointed out, the Commission has not resolved all the proposals which could be made. The Commission nevertheless is confident that,
|
9 |
+
LJ050-0277|with the active cooperation of the responsible agencies and with the understanding of the people of the United States in their demands upon their President,|with the active cooperation of the responsible agencies and with the understanding of the people of the United States in their demands upon their President,
|
10 |
+
LJ050-0278|the recommendations we have here suggested would greatly advance the security of the office without any impairment of our fundamental liberties.|the recommendations we have here suggested would greatly advance the security of the office without any impairment of our fundamental liberties.
|
11 |
+
LJ001-0028|but by printers in Strasburg, Basle, Paris, Lubeck, and other cities.|but by printers in Strasburg, Basle, Paris, Lubeck, and other cities.
|
12 |
+
LJ001-0068|The characteristic Dutch type, as represented by the excellent printer Gerard Leew, is very pronounced and uncompromising Gothic.|The characteristic Dutch type, as represented by the excellent printer Gerard Leew, is very pronounced and uncompromising Gothic.
|
13 |
+
LJ002-0149|The latter indeed hung like millstones round the neck of the unhappy insolvent wretches who found themselves in limbo.|The latter indeed hung like millstones round the neck of the unhappy insolvent wretches who found themselves in limbo.
|
14 |
+
LJ002-0157|and Susannah Evans, in October the same year, for 2 shillings, with costs of 6 shillings, 8 pence.|and Susannah Evans, in October the same year, for two shillings, with costs of six shillings, eight pence.
|
15 |
+
LJ002-0167|quotes a case which came within his own knowledge of a boy sent to prison for non-payment of one penny.|quotes a case which came within his own knowledge of a boy sent to prison for non-payment of one penny.
|
16 |
+
LJ003-0042|The completion of this very necessary building was, however, much delayed for want of funds,|The completion of this very necessary building was, however, much delayed for want of funds,
|
17 |
+
LJ003-0307|but as yet no suggestion was made to provide prison uniform.|but as yet no suggestion was made to provide prison uniform.
|
18 |
+
LJ004-0169|On the dirty bedstead lay a wretched being in the throes of severe illness.|On the dirty bedstead lay a wretched being in the throes of severe illness.
|
19 |
+
LJ004-0233|Under the new rule visitors were not allowed to pass into the interior of the prison, but were detained between the grating.|Under the new rule visitors were not allowed to pass into the interior of the prison, but were detained between the grating.
|
20 |
+
LJ005-0101|whence it deduced the practice and condition of every prison that replied.|whence it deduced the practice and condition of every prison that replied.
|
21 |
+
LJ005-0108|the prisoners, without firing, bedding, or sufficient food, spent their days "in surveying their grotesque prison,|the prisoners, without firing, bedding, or sufficient food, spent their days "in surveying their grotesque prison,
|
22 |
+
LJ005-0202|An examination of this report shows how even the most insignificant township had its jail.|An examination of this report shows how even the most insignificant township had its jail.
|
23 |
+
LJ005-0234|The visits of friends was once more unreservedly allowed, and these incomers freely brought in extra provisions and beer.|The visits of friends was once more unreservedly allowed, and these incomers freely brought in extra provisions and beer.
|
24 |
+
LJ005-0248|and stated that in his opinion Newgate, as the common jail of Middlesex, was wholly inadequate to the proper confinement of its prisoners.|and stated that in his opinion Newgate, as the common jail of Middlesex, was wholly inadequate to the proper confinement of its prisoners.
|
25 |
+
LJ006-0001|The Chronicles of Newgate, Volume 2. By Arthur Griffiths. Section 9: The first report of the inspector of prisons.|The Chronicles of Newgate, Volume two. By Arthur Griffiths. Section nine: The first report of the inspector of prisons.
|
26 |
+
LJ006-0018|One was Mr. William Crawford, the other the Rev. Whitworth Russell.|One was Mr. William Crawford, the other the Rev. Whitworth Russell.
|
27 |
+
LJ006-0034|They attended early and late; they mustered the prisoners, examined into their condition,|They attended early and late; they mustered the prisoners, examined into their condition,
|
28 |
+
LJ006-0078|A new prisoner's fate, as to location, rested really with a powerful fellow-prisoner.|A new prisoner's fate, as to location, rested really with a powerful fellow-prisoner.
|
29 |
+
LJ007-0217|They go on to say|They go on to say
|
30 |
+
LJ007-0243|It was not till the erection of the new prison at Holloway in 1850, and the entire internal reconstruction of Newgate according to new ideas,|It was not till the erection of the new prison at Holloway in eighteen fifty, and the entire internal reconstruction of Newgate according to new ideas,
|
31 |
+
LJ008-0087|The change from Tyburn to the Old Bailey had worked no improvement as regards the gathering together of the crowd or its demeanor.|The change from Tyburn to the Old Bailey had worked no improvement as regards the gathering together of the crowd or its demeanor.
|
32 |
+
LJ008-0131|the other he kept between his hands.|the other he kept between his hands.
|
33 |
+
LJ008-0140|Whenever the public attention had been specially called to a particular crime, either on account of its atrocity,|Whenever the public attention had been specially called to a particular crime, either on account of its atrocity,
|
34 |
+
LJ008-0158|The pressure soon became so frightful that many would have willingly escaped from the crowd; but their attempts only increased the general confusion.|The pressure soon became so frightful that many would have willingly escaped from the crowd; but their attempts only increased the general confusion.
|
35 |
+
LJ008-0174|One cart-load of spectators having broken down, some of its occupants fell off the vehicle, and were instantly trampled to death.|One cart-load of spectators having broken down, some of its occupants fell off the vehicle, and were instantly trampled to death.
|
36 |
+
LJ010-0047|while in 1850 Her Majesty was the victim of another outrage at the hands of one Pate.|while in eighteen fifty Her Majesty was the victim of another outrage at the hands of one Pate.
|
37 |
+
LJ010-0061|That some thirty or more needy men should hope to revolutionize England is a sufficient proof of the absurdity of their attempt.|That some thirty or more needy men should hope to revolutionize England is a sufficient proof of the absurdity of their attempt.
|
38 |
+
LJ010-0105|Thistlewood was discovered next morning in a mean house in White Street, Moorfields.|Thistlewood was discovered next morning in a mean house in White Street, Moorfields.
|
39 |
+
LJ010-0233|Here again probably it was partly the love of notoriety which was the incentive,|Here again probably it was partly the love of notoriety which was the incentive,
|
40 |
+
LJ010-0234|backed possibly with the hope that, as in a much more recent case,|backed possibly with the hope that, as in a much more recent case,
|
41 |
+
LJ010-0258|As the Queen was driving from Buckingham Palace to the Chapel Royal,|As the Queen was driving from Buckingham Palace to the Chapel Royal,
|
42 |
+
LJ010-0262|charged him with the offense.|charged him with the offense.
|
43 |
+
LJ010-0270|exactly tallied with that of the deformed person "wanted" for the assault on the Queen.|exactly tallied with that of the deformed person "wanted" for the assault on the Queen.
|
44 |
+
LJ010-0293|I have already remarked that as violence was more and more eliminated from crimes against the person,|I have already remarked that as violence was more and more eliminated from crimes against the person,
|
45 |
+
LJ011-0009|Nothing more was heard of the affair, although the lady declared that she had never instructed Fauntleroy to sell.|Nothing more was heard of the affair, although the lady declared that she had never instructed Fauntleroy to sell.
|
46 |
+
LJ011-0256|By this time the neighbors were aroused, and several people came to the scene of the affray.|By this time the neighbors were aroused, and several people came to the scene of the affray.
|
47 |
+
LJ012-0044|When his trade was busiest he set up a second establishment, at the head of which, although he was married,|When his trade was busiest he set up a second establishment, at the head of which, although he was married,
|
48 |
+
LJ012-0145|Solomons was now also admitted as a witness, and his evidence, with that of Moss, secured the transportation of the principal actors in the theft.|Solomons was now also admitted as a witness, and his evidence, with that of Moss, secured the transportation of the principal actors in the theft.
|
49 |
+
LJ013-0020|he acted in a manner which excited the suspicions of the crew.|he acted in a manner which excited the suspicions of the crew.
|
50 |
+
LJ013-0077|Barber and Fletcher were both transported for life, although Fletcher declared that Barber was innocent, and had no guilty knowledge of what was being done.|Barber and Fletcher were both transported for life, although Fletcher declared that Barber was innocent, and had no guilty knowledge of what was being done.
|
51 |
+
LJ013-0228|In the pocket of the coat Mr. Cope, the governor, found a neatly-folded cloth, and asked what it was for.|In the pocket of the coat Mr. Cope, the governor, found a neatly-folded cloth, and asked what it was for.
|
52 |
+
LJ014-0020|He was soon afterwards arrested on suspicion, and a search of his lodgings brought to light several garments saturated with blood;|He was soon afterwards arrested on suspicion, and a search of his lodgings brought to light several garments saturated with blood;
|
53 |
+
LJ014-0054|a maidservant, Sarah Thomas, murdered her mistress, an aged woman, by beating out her brains with a stone.|a maidservant, Sarah Thomas, murdered her mistress, an aged woman, by beating out her brains with a stone.
|
54 |
+
LJ014-0101|he found that it was soft and new, while elsewhere it was set and hard.|he found that it was soft and new, while elsewhere it was set and hard.
|
55 |
+
LJ014-0103|beneath them was a layer of fresh mortar, beneath that a lot of loose earth, amongst which a stocking was turned up, and presently a human toe.|beneath them was a layer of fresh mortar, beneath that a lot of loose earth, amongst which a stocking was turned up, and presently a human toe.
|
56 |
+
LJ014-0263|When other pleasures palled he took a theatre, and posed as a munificent patron of the dramatic art.|When other pleasures palled he took a theatre, and posed as a munificent patron of the dramatic art.
|
57 |
+
LJ014-0272|and 1850 to embezzle and apply to his own purposes some £71,000.|and eighteen fifty to embezzle and apply to his own purposes some seventy-one thousand pounds.
|
58 |
+
LJ014-0311|His extensive business had been carried on by fraud.|His extensive business had been carried on by fraud.
|
59 |
+
LJ015-0197|which at one time spread terror throughout London. Thieves preferred now to use ingenuity rather than brute force.|which at one time spread terror throughout London. Thieves preferred now to use ingenuity rather than brute force.
|
60 |
+
LJ016-0089|He was engaged in whitewashing and cleaning; the officer who had him in charge left him on the stairs leading to the gallery.|He was engaged in whitewashing and cleaning; the officer who had him in charge left him on the stairs leading to the gallery.
|
61 |
+
LJ016-0407|who generally attended the prison services.|who generally attended the prison services.
|
62 |
+
LJ016-0443|He was promptly rescued from his perilous condition, but not before his face and hands were badly scorched.|He was promptly rescued from his perilous condition, but not before his face and hands were badly scorched.
|
63 |
+
LJ017-0033|a medical practitioner, charged with doing to death persons who relied upon his professional skill.|a medical practitioner, charged with doing to death persons who relied upon his professional skill.
|
64 |
+
LJ017-0038|That the administration of justice should never be interfered with by local prejudice or local feeling|That the administration of justice should never be interfered with by local prejudice or local feeling
|
65 |
+
LJ018-0018|he wore gold-rimmed eye-glasses and a gold watch and chain.|he wore gold-rimmed eye-glasses and a gold watch and chain.
|
66 |
+
LJ018-0119|His offer was not, however, accepted.|His offer was not, however, accepted.
|
67 |
+
LJ018-0280|The commercial experience of these clever rogues was cosmopolitan.|The commercial experience of these clever rogues was cosmopolitan.
|
68 |
+
LJ019-0178|and abandoned because of the expense. As to the entire reconstruction of Newgate, nothing had been done as yet.|and abandoned because of the expense. As to the entire reconstruction of Newgate, nothing had been done as yet.
|
69 |
+
LJ019-0240|But no structural alterations were made from the date first quoted until the time of closing the prison in 1881.|But no structural alterations were made from the date first quoted until the time of closing the prison in eighteen eighty-one.
|
70 |
+
LJ021-0049|and the curtailment of rank stock speculation through the Securities Exchange Act.|and the curtailment of rank stock speculation through the Securities Exchange Act.
|
71 |
+
LJ021-0155|both directly on the public works themselves, and indirectly in the industries supplying the materials for these public works.|both directly on the public works themselves, and indirectly in the industries supplying the materials for these public works.
|
72 |
+
LJ022-0046|It is true that while business and industry are definitely better our relief rolls are still too large.|It is true that while business and industry are definitely better our relief rolls are still too large.
|
73 |
+
LJ022-0173|for the regulation of transportation by water, for the strengthening of our Merchant Marine and Air Transport,|for the regulation of transportation by water, for the strengthening of our Merchant Marine and Air Transport,
|
74 |
+
LJ024-0087|I have thus explained to you the reasons that lie behind our efforts to secure results by legislation within the Constitution.|I have thus explained to you the reasons that lie behind our efforts to secure results by legislation within the Constitution.
|
75 |
+
LJ024-0110|And the strategy of that last stand is to suggest the time-consuming process of amendment in order to kill off by delay|And the strategy of that last stand is to suggest the time-consuming process of amendment in order to kill off by delay
|
76 |
+
LJ024-0119|When before have you found them really at your side in your fights for progress?|When before have you found them really at your side in your fights for progress?
|
77 |
+
LJ025-0091|as it was current among contemporary chemists.|as it was current among contemporary chemists.
|
78 |
+
LJ026-0029|so in the case under discussion.|so in the case under discussion.
|
79 |
+
LJ026-0039|the earliest organisms were protists and that from them animals and plants were evolved along divergent lines of descent.|the earliest organisms were protists and that from them animals and plants were evolved along divergent lines of descent.
|
80 |
+
LJ026-0064|but unlike that of the animal, it is not chiefly an income of foods, but only of the raw materials of food.|but unlike that of the animal, it is not chiefly an income of foods, but only of the raw materials of food.
|
81 |
+
LJ026-0105|This is done by diastase, an enzyme of plant cells.|This is done by diastase, an enzyme of plant cells.
|
82 |
+
LJ026-0137|and be laid down as "reserve starch" in the cells of root or stem or elsewhere.|and be laid down as "reserve starch" in the cells of root or stem or elsewhere.
|
83 |
+
LJ027-0006|In all these lines the facts are drawn together by a strong thread of unity.|In all these lines the facts are drawn together by a strong thread of unity.
|
84 |
+
LJ028-0134|He also erected what is called a pensile paradise:|He also erected what is called a pensile paradise:
|
85 |
+
LJ028-0138|perhaps the tales that travelers told him were exaggerated as travelers' tales are likely to be,|perhaps the tales that travelers told him were exaggerated as travelers' tales are likely to be,
|
86 |
+
LJ028-0189|The fall of Babylon with its lofty walls was a most important event in the history of the ancient world.|The fall of Babylon with its lofty walls was a most important event in the history of the ancient world.
|
87 |
+
LJ028-0281|Till mules foal ye shall not take our city, he thought, as he reflected on this speech, that Babylon might now be taken,|Till mules foal ye shall not take our city, he thought, as he reflected on this speech, that Babylon might now be taken,
|
88 |
+
LJ029-0188|Stevenson was jeered, jostled, and spat upon by hostile demonstrators outside the Dallas Memorial Auditorium Theater.|Stevenson was jeered, jostled, and spat upon by hostile demonstrators outside the Dallas Memorial Auditorium Theater.
|
89 |
+
LJ030-0098|The remainder of the motorcade consisted of five cars for other dignitaries, including the mayor of Dallas and Texas Congressmen,|The remainder of the motorcade consisted of five cars for other dignitaries, including the mayor of Dallas and Texas Congressmen,
|
90 |
+
LJ031-0007|Chief of Police Curry and police motorcyclists at the head of the motorcade led the way to the hospital.|Chief of Police Curry and police motorcyclists at the head of the motorcade led the way to the hospital.
|
91 |
+
LJ031-0091|You have to determine which things, which are immediately life threatening and cope with them, before attempting to evaluate the full extent of the injuries.|You have to determine which things, which are immediately life threatening and cope with them, before attempting to evaluate the full extent of the injuries.
|
92 |
+
LJ031-0227|The doctors traced the course of the bullet through the body and, as information was received from Parkland Hospital,|The doctors traced the course of the bullet through the body and, as information was received from Parkland Hospital,
|
93 |
+
LJ032-0100|Marina Oswald|Marina Oswald
|
94 |
+
LJ032-0165|to the exclusion of all others because there are not enough microscopic characteristics present in fibers.|to the exclusion of all others because there are not enough microscopic characteristics present in fibers.
|
95 |
+
LJ032-0198|During the period from March 2, 1963, to April 24, 1963,|During the period from March two, nineteen sixty-three, to April twenty-four, nineteen sixty-three,
|
96 |
+
LJ033-0046|went out to the garage to paint some children's blocks, and worked in the garage for half an hour or so.|went out to the garage to paint some children's blocks, and worked in the garage for half an hour or so.
|
97 |
+
LJ033-0072|I then stepped off of it and the officer picked it up in the middle and it bent so.|I then stepped off of it and the officer picked it up in the middle and it bent so.
|
98 |
+
LJ033-0135|Location of Bag|Location of Bag
|
99 |
+
LJ034-0083|The significance of Givens' observation that Oswald was carrying his clipboard|The significance of Givens' observation that Oswald was carrying his clipboard
|
100 |
+
LJ034-0179|and, quote, seemed to be sitting a little forward, end quote,|and, quote, seemed to be sitting a little forward, end quote,
|
101 |
+
LJ035-0125|Victoria Adams, who worked on the fourth floor of the Depository Building,|Victoria Adams, who worked on the fourth floor of the Depository Building,
|
102 |
+
LJ035-0162|approximately 30 to 45 seconds after Oswald's lunchroom encounter with Baker and Truly.|approximately thirty to forty-five seconds after Oswald's lunchroom encounter with Baker and Truly.
|
103 |
+
LJ035-0189|Special Agent Forrest V. Sorrels of the Secret Service, who had been in the motorcade,|Special Agent Forrest V. Sorrels of the Secret Service, who had been in the motorcade,
|
104 |
+
LJ035-0208|Oswald's known actions in the building immediately after the assassination are consistent with his having been at the southeast corner window of the sixth floor|Oswald's known actions in the building immediately after the assassination are consistent with his having been at the southeast corner window of the sixth floor
|
105 |
+
LJ036-0216|Tippit got out and started to walk around the front of the car|Tippit got out and started to walk around the front of the car
|
106 |
+
LJ037-0093|William Arthur Smith was about a block east of 10th and Patton when he heard shots.|William Arthur Smith was about a block east of tenth and Patton when he heard shots.
|
107 |
+
LJ037-0157|taken from Oswald.|taken from Oswald.
|
108 |
+
LJ037-0178|or one used Remington-Peters cartridge case, which may have been in the revolver before the shooting,|or one used Remington-Peters cartridge case, which may have been in the revolver before the shooting,
|
109 |
+
LJ037-0219|Oswald's Jacket|Oswald's Jacket
|
110 |
+
LJ037-0222|When Oswald was arrested, he did not have a jacket.|When Oswald was arrested, he did not have a jacket.
|
111 |
+
LJ038-0017|Attracted by the sound of the sirens, Mrs. Postal stepped out of the box office and walked to the curb.|Attracted by the sound of the sirens, Mrs. Postal stepped out of the box office and walked to the curb.
|
112 |
+
LJ038-0052|testified regarding the arrest of Oswald, as did the various police officers who participated in the fight.|testified regarding the arrest of Oswald, as did the various police officers who participated in the fight.
|
113 |
+
LJ038-0077|Statements of Oswald during Detention.|Statements of Oswald during Detention.
|
114 |
+
LJ038-0161|and he asked me did I know which way he was coming, and I told him, yes, he probably come down Main and turn on Houston and then back again on Elm.|and he asked me did I know which way he was coming, and I told him, yes, he probably come down Main and turn on Houston and then back again on Elm.
|
115 |
+
LJ038-0212|which appeared to be the work of a man expecting to be killed, or imprisoned, or to disappear.|which appeared to be the work of a man expecting to be killed, or imprisoned, or to disappear.
|
116 |
+
LJ039-0103|Oswald, like all Marine recruits, received training on the rifle range at distances up to 500 yards,|Oswald, like all Marine recruits, received training on the rifle range at distances up to five hundred yards,
|
117 |
+
LJ039-0149|established that they had been previously loaded and ejected from the assassination rifle,|established that they had been previously loaded and ejected from the assassination rifle,
|
118 |
+
LJ040-0107|but apparently was not able to spend as much time with them as he would have liked, because of the age gaps of 5 and 7 years,|but apparently was not able to spend as much time with them as he would have liked, because of the age gaps of five and seven years,
|
119 |
+
LJ040-0119|When Pic returned home, Mrs. Oswald tried to play down the event but Mrs. Pic took a different view and asked the Oswalds to leave.|When Pic returned home, Mrs. Oswald tried to play down the event but Mrs. Pic took a different view and asked the Oswalds to leave.
|
120 |
+
LJ040-0161|Dr. Hartogs recommended that Oswald be placed on probation on condition that he seek help and guidance through a child guidance clinic.|Dr. Hartogs recommended that Oswald be placed on probation on condition that he seek help and guidance through a child guidance clinic.
|
121 |
+
LJ040-0169|She observed that since Lee's mother worked all day, he made his own meals and spent all his time alone|She observed that since Lee's mother worked all day, he made his own meals and spent all his time alone
|
122 |
+
LJ041-0098|All the Marine Corps did was to teach you to kill and after you got out of the Marines you might be good gangsters, end quote.|All the Marine Corps did was to teach you to kill and after you got out of the Marines you might be good gangsters, end quote.
|
123 |
+
LJ042-0017|and see for himself how a revolutionary society operates, a Marxist society.|and see for himself how a revolutionary society operates, a Marxist society.
|
124 |
+
LJ042-0070|Oswald was discovered in time to thwart his attempt at suicide.|Oswald was discovered in time to thwart his attempt at suicide.
|
125 |
+
LJ042-0161|Immediately after serving out his 3 years in the U.S. Marine Corps, he abandoned his American life to seek a new life in the USSR.|Immediately after serving out his three years in the U.S. Marine Corps, he abandoned his American life to seek a new life in the USSR.
|
126 |
+
LJ043-0147|He had left a note for his wife telling her what to do in case he were apprehended, as well as his notebook and the pictures of himself holding the rifle.|He had left a note for his wife telling her what to do in case he were apprehended, as well as his notebook and the pictures of himself holding the rifle.
|
127 |
+
LJ043-0178|as, in fact, one of them did appear after the assassination.|as, in fact, one of them did appear after the assassination.
|
128 |
+
LJ043-0183|Oswald did not lack the determination and other traits required|Oswald did not lack the determination and other traits required
|
129 |
+
LJ043-0185|Some idea of what he thought was sufficient reason for such an act may be found in the nature of the motive that he stated for his attack on General Walker.|Some idea of what he thought was sufficient reason for such an act may be found in the nature of the motive that he stated for his attack on General Walker.
|
130 |
+
LJ044-0057|extensive investigation was not able to connect Oswald with that address, although it did develop the fact|extensive investigation was not able to connect Oswald with that address, although it did develop the fact
|
131 |
+
LJ044-0109|It is good to know that movements in support of fair play for Cuba has developed in New Orleans as well as in other cities.|It is good to know that movements in support of fair play for Cuba has developed in New Orleans as well as in other cities.
|
132 |
+
LJ045-0081|Although she denied it in some of her testimony before the Commission,|Although she denied it in some of her testimony before the Commission,
|
133 |
+
LJ045-0147|She asked Oswald, quote,|She asked Oswald, quote,
|
134 |
+
LJ045-0204|he had never found anything to which he felt he could really belong.|he had never found anything to which he felt he could really belong.
|
135 |
+
LJ046-0193|and 12 to 15 of these cases as highly dangerous risks.|and twelve to fifteen of these cases as highly dangerous risks.
|
136 |
+
LJ046-0244|PRS should have investigated and been prepared to guard against it.|PRS should have investigated and been prepared to guard against it.
|
137 |
+
LJ047-0059|However, pursuant to a regular Bureau practice of interviewing certain immigrants from Iron Curtain countries,|However, pursuant to a regular Bureau practice of interviewing certain immigrants from Iron Curtain countries,
|
138 |
+
LJ047-0142|The Bureau had no earlier information suggesting that Oswald had left the United States.|The Bureau had no earlier information suggesting that Oswald had left the United States.
|
139 |
+
LJ048-0035|It was against this background and consistent with the criteria followed by the FBI prior to November 22|It was against this background and consistent with the criteria followed by the FBI prior to November twenty-two
|
140 |
+
LJ048-0063|The formal FBI instructions to its agents outlining the information to be referred to the Secret Service were too narrow at the time of the assassination.|The formal FBI instructions to its agents outlining the information to be referred to the Secret Service were too narrow at the time of the assassination.
|
141 |
+
LJ048-0104|There were far safer routes via freeways directly to the Trade Mart,|There were far safer routes via freeways directly to the Trade Mart,
|
142 |
+
LJ048-0187|In addition, Secret Service agents riding in the motorcade were trained to scan buildings as part of their general observation of the crowd of spectators.|In addition, Secret Service agents riding in the motorcade were trained to scan buildings as part of their general observation of the crowd of spectators.
|
143 |
+
LJ048-0271|will be cause for removal from the Service, end quote.|will be cause for removal from the Service, end quote.
|
144 |
+
LJ049-0031|The Presidential vehicle in use in Dallas, described in chapter 2,|The Presidential vehicle in use in Dallas, described in chapter two,
|
145 |
+
LJ049-0059|Agents are instructed that it is not their responsibility to investigate or evaluate a present danger,|Agents are instructed that it is not their responsibility to investigate or evaluate a present danger,
|
146 |
+
LJ049-0174|to notify the Secret Service of the substantial information about Lee Harvey Oswald which the FBI had accumulated|to notify the Secret Service of the substantial information about Lee Harvey Oswald which the FBI had accumulated
|
147 |
+
LJ050-0049|and from a specialist in psychiatric prognostication at Walter Reed Hospital.|and from a specialist in psychiatric prognostication at Walter Reed Hospital.
|
148 |
+
LJ050-0113|Such agreements should describe in detail the information which is sought, the manner in which it will be provided to the Secret Service,|Such agreements should describe in detail the information which is sought, the manner in which it will be provided to the Secret Service,
|
149 |
+
LJ050-0150|Its present manual filing system is obsolete;|Its present manual filing system is obsolete;
|
150 |
+
LJ050-0189|that written instructions might come into the hands of local newspapers, to the prejudice of the precautions described.|that written instructions might come into the hands of local newspapers, to the prejudice of the precautions described.
|
hifigan/README.md
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis
|
2 |
+
|
3 |
+
### Jungil Kong, Jaehyeon Kim, Jaekyoung Bae
|
4 |
+
|
5 |
+
In our [paper](https://arxiv.org/abs/2010.05646),
|
6 |
+
we proposed HiFi-GAN: a GAN-based model capable of generating high fidelity speech efficiently.<br/>
|
7 |
+
We provide our implementation and pretrained models as open source in this repository.
|
8 |
+
|
9 |
+
**Abstract :**
|
10 |
+
Several recent work on speech synthesis have employed generative adversarial networks (GANs) to produce raw waveforms.
|
11 |
+
Although such methods improve the sampling efficiency and memory usage,
|
12 |
+
their sample quality has not yet reached that of autoregressive and flow-based generative models.
|
13 |
+
In this work, we propose HiFi-GAN, which achieves both efficient and high-fidelity speech synthesis.
|
14 |
+
As speech audio consists of sinusoidal signals with various periods,
|
15 |
+
we demonstrate that modeling periodic patterns of an audio is crucial for enhancing sample quality.
|
16 |
+
A subjective human evaluation (mean opinion score, MOS) of a single speaker dataset indicates that our proposed method
|
17 |
+
demonstrates similarity to human quality while generating 22.05 kHz high-fidelity audio 167.9 times faster than
|
18 |
+
real-time on a single V100 GPU. We further show the generality of HiFi-GAN to the mel-spectrogram inversion of unseen
|
19 |
+
speakers and end-to-end speech synthesis. Finally, a small footprint version of HiFi-GAN generates samples 13.4 times
|
20 |
+
faster than real-time on CPU with comparable quality to an autoregressive counterpart.
|
21 |
+
|
22 |
+
Visit our [demo website](https://jik876.github.io/hifi-gan-demo/) for audio samples.
|
23 |
+
|
24 |
+
|
25 |
+
## Pre-requisites
|
26 |
+
1. Python >= 3.6
|
27 |
+
2. Clone this repository.
|
28 |
+
3. Install python requirements. Please refer [requirements.txt](requirements.txt)
|
29 |
+
4. Download and extract the [LJ Speech dataset](https://keithito.com/LJ-Speech-Dataset/).
|
30 |
+
And move all wav files to `LJSpeech-1.1/wavs`
|
31 |
+
|
32 |
+
|
33 |
+
## Training
|
34 |
+
```
|
35 |
+
python train.py --config config_v1.json
|
36 |
+
```
|
37 |
+
To train V2 or V3 Generator, replace `config_v1.json` with `config_v2.json` or `config_v3.json`.<br>
|
38 |
+
Checkpoints and copy of the configuration file are saved in `cp_hifigan` directory by default.<br>
|
39 |
+
You can change the path by adding `--checkpoint_path` option.
|
40 |
+
|
41 |
+
Validation loss during training with V1 generator.<br>
|
42 |
+
![validation loss](./validation_loss.png)
|
43 |
+
|
44 |
+
## Pretrained Model
|
45 |
+
You can also use pretrained models we provide.<br/>
|
46 |
+
[Download pretrained models](https://drive.google.com/drive/folders/1-eEYTB5Av9jNql0WGBlRoi-WH2J7bp5Y?usp=sharing)<br/>
|
47 |
+
Details of each folder are as in follows:
|
48 |
+
|
49 |
+
|Folder Name|Generator|Dataset|Fine-Tuned|
|
50 |
+
|------|---|---|---|
|
51 |
+
|LJ_V1|V1|LJSpeech|No|
|
52 |
+
|LJ_V2|V2|LJSpeech|No|
|
53 |
+
|LJ_V3|V3|LJSpeech|No|
|
54 |
+
|LJ_FT_T2_V1|V1|LJSpeech|Yes ([Tacotron2](https://github.com/NVIDIA/tacotron2))|
|
55 |
+
|LJ_FT_T2_V2|V2|LJSpeech|Yes ([Tacotron2](https://github.com/NVIDIA/tacotron2))|
|
56 |
+
|LJ_FT_T2_V3|V3|LJSpeech|Yes ([Tacotron2](https://github.com/NVIDIA/tacotron2))|
|
57 |
+
|VCTK_V1|V1|VCTK|No|
|
58 |
+
|VCTK_V2|V2|VCTK|No|
|
59 |
+
|VCTK_V3|V3|VCTK|No|
|
60 |
+
|UNIVERSAL_V1|V1|Universal|No|
|
61 |
+
|
62 |
+
We provide the universal model with discriminator weights that can be used as a base for transfer learning to other datasets.
|
63 |
+
|
64 |
+
## Fine-Tuning
|
65 |
+
1. Generate mel-spectrograms in numpy format using [Tacotron2](https://github.com/NVIDIA/tacotron2) with teacher-forcing.<br/>
|
66 |
+
The file name of the generated mel-spectrogram should match the audio file and the extension should be `.npy`.<br/>
|
67 |
+
Example:
|
68 |
+
```
|
69 |
+
Audio File : LJ001-0001.wav
|
70 |
+
Mel-Spectrogram File : LJ001-0001.npy
|
71 |
+
```
|
72 |
+
2. Create `ft_dataset` folder and copy the generated mel-spectrogram files into it.<br/>
|
73 |
+
3. Run the following command.
|
74 |
+
```
|
75 |
+
python train.py --fine_tuning True --config config_v1.json
|
76 |
+
```
|
77 |
+
For other command line options, please refer to the training section.
|
78 |
+
|
79 |
+
|
80 |
+
## Inference from wav file
|
81 |
+
1. Make `test_files` directory and copy wav files into the directory.
|
82 |
+
2. Run the following command.
|
83 |
+
```
|
84 |
+
python inference.py --checkpoint_file [generator checkpoint file path]
|
85 |
+
```
|
86 |
+
Generated wav files are saved in `generated_files` by default.<br>
|
87 |
+
You can change the path by adding `--output_dir` option.
|
88 |
+
|
89 |
+
|
90 |
+
## Inference for end-to-end speech synthesis
|
91 |
+
1. Make `test_mel_files` directory and copy generated mel-spectrogram files into the directory.<br>
|
92 |
+
You can generate mel-spectrograms using [Tacotron2](https://github.com/NVIDIA/tacotron2),
|
93 |
+
[Glow-TTS](https://github.com/jaywalnut310/glow-tts) and so forth.
|
94 |
+
2. Run the following command.
|
95 |
+
```
|
96 |
+
python inference_e2e.py --checkpoint_file [generator checkpoint file path]
|
97 |
+
```
|
98 |
+
Generated wav files are saved in `generated_files_from_mel` by default.<br>
|
99 |
+
You can change the path by adding `--output_dir` option.
|
100 |
+
|
101 |
+
|
102 |
+
## Acknowledgements
|
103 |
+
We referred to [WaveGlow](https://github.com/NVIDIA/waveglow), [MelGAN](https://github.com/descriptinc/melgan-neurips)
|
104 |
+
and [Tacotron2](https://github.com/NVIDIA/tacotron2) to implement this.
|
105 |
+
|
hifigan/__pycache__/env.cpython-310.pyc
ADDED
Binary file (840 Bytes). View file
|
|
hifigan/__pycache__/models.cpython-310.pyc
ADDED
Binary file (8.7 kB). View file
|
|
hifigan/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (2.05 kB). View file
|
|
hifigan/config_v1.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"resblock": "1",
|
3 |
+
"num_gpus": 0,
|
4 |
+
"batch_size": 16,
|
5 |
+
"learning_rate": 0.0002,
|
6 |
+
"adam_b1": 0.8,
|
7 |
+
"adam_b2": 0.99,
|
8 |
+
"lr_decay": 0.999,
|
9 |
+
"seed": 1234,
|
10 |
+
|
11 |
+
"upsample_rates": [8,8,2,2],
|
12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
13 |
+
"upsample_initial_channel": 512,
|
14 |
+
"resblock_kernel_sizes": [3,7,11],
|
15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
16 |
+
|
17 |
+
"segment_size": 8192,
|
18 |
+
"num_mels": 80,
|
19 |
+
"num_freq": 1025,
|
20 |
+
"n_fft": 1024,
|
21 |
+
"hop_size": 256,
|
22 |
+
"win_size": 1024,
|
23 |
+
|
24 |
+
"sampling_rate": 22050,
|
25 |
+
|
26 |
+
"fmin": 0,
|
27 |
+
"fmax": 8000,
|
28 |
+
"fmax_for_loss": null,
|
29 |
+
|
30 |
+
"num_workers": 4,
|
31 |
+
|
32 |
+
"dist_config": {
|
33 |
+
"dist_backend": "nccl",
|
34 |
+
"dist_url": "tcp://localhost:54321",
|
35 |
+
"world_size": 1
|
36 |
+
}
|
37 |
+
}
|
hifigan/env.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
|
4 |
+
|
5 |
+
class AttrDict(dict):
|
6 |
+
def __init__(self, *args, **kwargs):
|
7 |
+
super(AttrDict, self).__init__(*args, **kwargs)
|
8 |
+
self.__dict__ = self
|
9 |
+
|
10 |
+
|
11 |
+
def build_env(config, config_name, path):
|
12 |
+
t_path = os.path.join(path, config_name)
|
13 |
+
if config != t_path:
|
14 |
+
os.makedirs(path, exist_ok=True)
|
15 |
+
shutil.copyfile(config, os.path.join(path, config_name))
|
hifigan/inference.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import absolute_import, division, print_function, unicode_literals
|
2 |
+
|
3 |
+
import glob
|
4 |
+
import os
|
5 |
+
import argparse
|
6 |
+
import json
|
7 |
+
import torch
|
8 |
+
from scipy.io.wavfile import write
|
9 |
+
from env import AttrDict
|
10 |
+
from meldataset import mel_spectrogram, MAX_WAV_VALUE, load_wav
|
11 |
+
from models import Generator
|
12 |
+
|
13 |
+
h = None
|
14 |
+
device = None
|
15 |
+
|
16 |
+
|
17 |
+
def load_checkpoint(filepath, device):
|
18 |
+
assert os.path.isfile(filepath)
|
19 |
+
print("Loading '{}'".format(filepath))
|
20 |
+
checkpoint_dict = torch.load(filepath, map_location=device)
|
21 |
+
print("Complete.")
|
22 |
+
return checkpoint_dict
|
23 |
+
|
24 |
+
|
25 |
+
def get_mel(x):
|
26 |
+
return mel_spectrogram(x, h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, h.win_size, h.fmin, h.fmax)
|
27 |
+
|
28 |
+
|
29 |
+
def scan_checkpoint(cp_dir, prefix):
|
30 |
+
pattern = os.path.join(cp_dir, prefix + '*')
|
31 |
+
cp_list = glob.glob(pattern)
|
32 |
+
if len(cp_list) == 0:
|
33 |
+
return ''
|
34 |
+
return sorted(cp_list)[-1]
|
35 |
+
|
36 |
+
|
37 |
+
def inference(a):
|
38 |
+
generator = Generator(h).to(device)
|
39 |
+
|
40 |
+
state_dict_g = load_checkpoint(a.checkpoint_file, device)
|
41 |
+
generator.load_state_dict(state_dict_g['generator'])
|
42 |
+
|
43 |
+
filelist = os.listdir(a.input_wavs_dir)
|
44 |
+
|
45 |
+
os.makedirs(a.output_dir, exist_ok=True)
|
46 |
+
|
47 |
+
generator.eval()
|
48 |
+
generator.remove_weight_norm()
|
49 |
+
with torch.no_grad():
|
50 |
+
for i, filname in enumerate(filelist):
|
51 |
+
wav, sr = load_wav(os.path.join(a.input_wavs_dir, filname))
|
52 |
+
wav = wav / MAX_WAV_VALUE
|
53 |
+
wav = torch.FloatTensor(wav).to(device)
|
54 |
+
x = get_mel(wav.unsqueeze(0))
|
55 |
+
y_g_hat = generator(x)
|
56 |
+
audio = y_g_hat.squeeze()
|
57 |
+
audio = audio * MAX_WAV_VALUE
|
58 |
+
audio = audio.cpu().numpy().astype('int16')
|
59 |
+
|
60 |
+
output_file = os.path.join(a.output_dir, os.path.splitext(filname)[0] + '_generated.wav')
|
61 |
+
write(output_file, h.sampling_rate, audio)
|
62 |
+
print(output_file)
|
63 |
+
|
64 |
+
|
65 |
+
def main():
|
66 |
+
print('Initializing Inference Process..')
|
67 |
+
|
68 |
+
parser = argparse.ArgumentParser()
|
69 |
+
parser.add_argument('--input_wavs_dir', default='test_files')
|
70 |
+
parser.add_argument('--output_dir', default='generated_files')
|
71 |
+
parser.add_argument('--checkpoint_file', required=True)
|
72 |
+
a = parser.parse_args()
|
73 |
+
|
74 |
+
config_file = os.path.join(os.path.split(a.checkpoint_file)[0], 'config.json')
|
75 |
+
with open(config_file) as f:
|
76 |
+
data = f.read()
|
77 |
+
|
78 |
+
global h
|
79 |
+
json_config = json.loads(data)
|
80 |
+
h = AttrDict(json_config)
|
81 |
+
|
82 |
+
torch.manual_seed(h.seed)
|
83 |
+
global device
|
84 |
+
if torch.cuda.is_available():
|
85 |
+
torch.cuda.manual_seed(h.seed)
|
86 |
+
device = torch.device('cuda')
|
87 |
+
else:
|
88 |
+
device = torch.device('cpu')
|
89 |
+
|
90 |
+
inference(a)
|
91 |
+
|
92 |
+
|
93 |
+
if __name__ == '__main__':
|
94 |
+
main()
|
95 |
+
|
hifigan/meldataset.py
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import os
|
3 |
+
import random
|
4 |
+
import torch
|
5 |
+
import torch.utils.data
|
6 |
+
import numpy as np
|
7 |
+
from librosa.util import normalize
|
8 |
+
from scipy.io.wavfile import read
|
9 |
+
from librosa.filters import mel as librosa_mel_fn
|
10 |
+
|
11 |
+
MAX_WAV_VALUE = 32768.0
|
12 |
+
|
13 |
+
|
14 |
+
def load_wav(full_path):
|
15 |
+
sampling_rate, data = read(full_path)
|
16 |
+
return data, sampling_rate
|
17 |
+
|
18 |
+
|
19 |
+
def dynamic_range_compression(x, C=1, clip_val=1e-5):
|
20 |
+
return np.log(np.clip(x, a_min=clip_val, a_max=None) * C)
|
21 |
+
|
22 |
+
|
23 |
+
def dynamic_range_decompression(x, C=1):
|
24 |
+
return np.exp(x) / C
|
25 |
+
|
26 |
+
|
27 |
+
def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
|
28 |
+
return torch.log(torch.clamp(x, min=clip_val) * C)
|
29 |
+
|
30 |
+
|
31 |
+
def dynamic_range_decompression_torch(x, C=1):
|
32 |
+
return torch.exp(x) / C
|
33 |
+
|
34 |
+
|
35 |
+
def spectral_normalize_torch(magnitudes):
|
36 |
+
output = dynamic_range_compression_torch(magnitudes)
|
37 |
+
return output
|
38 |
+
|
39 |
+
|
40 |
+
def spectral_de_normalize_torch(magnitudes):
|
41 |
+
output = dynamic_range_decompression_torch(magnitudes)
|
42 |
+
return output
|
43 |
+
|
44 |
+
|
45 |
+
mel_basis = {}
|
46 |
+
hann_window = {}
|
47 |
+
|
48 |
+
|
49 |
+
def mel_spectrogram(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False):
|
50 |
+
if torch.min(y) < -1.:
|
51 |
+
print('min value is ', torch.min(y))
|
52 |
+
if torch.max(y) > 1.:
|
53 |
+
print('max value is ', torch.max(y))
|
54 |
+
|
55 |
+
global mel_basis, hann_window
|
56 |
+
if fmax not in mel_basis:
|
57 |
+
mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax)
|
58 |
+
mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
|
59 |
+
hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)
|
60 |
+
|
61 |
+
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
|
62 |
+
y = y.squeeze(1)
|
63 |
+
|
64 |
+
spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[str(y.device)],
|
65 |
+
center=center, pad_mode='reflect', normalized=False, onesided=True)
|
66 |
+
|
67 |
+
spec = torch.sqrt(spec.pow(2).sum(-1)+(1e-9))
|
68 |
+
|
69 |
+
spec = torch.matmul(mel_basis[str(fmax)+'_'+str(y.device)], spec)
|
70 |
+
spec = spectral_normalize_torch(spec)
|
71 |
+
|
72 |
+
return spec
|
73 |
+
|
74 |
+
|
75 |
+
def get_dataset_filelist(a):
|
76 |
+
with open(a.input_training_file, 'r', encoding='utf-8') as fi:
|
77 |
+
training_files = [os.path.join(a.input_wavs_dir, x.split('|')[0] + '.wav')
|
78 |
+
for x in fi.read().split('\n') if len(x) > 0]
|
79 |
+
|
80 |
+
with open(a.input_validation_file, 'r', encoding='utf-8') as fi:
|
81 |
+
validation_files = [os.path.join(a.input_wavs_dir, x.split('|')[0] + '.wav')
|
82 |
+
for x in fi.read().split('\n') if len(x) > 0]
|
83 |
+
return training_files, validation_files
|
84 |
+
|
85 |
+
|
86 |
+
class MelDataset(torch.utils.data.Dataset):
|
87 |
+
def __init__(self, training_files, segment_size, n_fft, num_mels,
|
88 |
+
hop_size, win_size, sampling_rate, fmin, fmax, split=True, shuffle=True, n_cache_reuse=1,
|
89 |
+
device=None, fmax_loss=None, fine_tuning=False, base_mels_path=None):
|
90 |
+
self.audio_files = training_files
|
91 |
+
random.seed(1234)
|
92 |
+
if shuffle:
|
93 |
+
random.shuffle(self.audio_files)
|
94 |
+
self.segment_size = segment_size
|
95 |
+
self.sampling_rate = sampling_rate
|
96 |
+
self.split = split
|
97 |
+
self.n_fft = n_fft
|
98 |
+
self.num_mels = num_mels
|
99 |
+
self.hop_size = hop_size
|
100 |
+
self.win_size = win_size
|
101 |
+
self.fmin = fmin
|
102 |
+
self.fmax = fmax
|
103 |
+
self.fmax_loss = fmax_loss
|
104 |
+
self.cached_wav = None
|
105 |
+
self.n_cache_reuse = n_cache_reuse
|
106 |
+
self._cache_ref_count = 0
|
107 |
+
self.device = device
|
108 |
+
self.fine_tuning = fine_tuning
|
109 |
+
self.base_mels_path = base_mels_path
|
110 |
+
|
111 |
+
def __getitem__(self, index):
|
112 |
+
filename = self.audio_files[index]
|
113 |
+
if self._cache_ref_count == 0:
|
114 |
+
audio, sampling_rate = load_wav(filename)
|
115 |
+
audio = audio / MAX_WAV_VALUE
|
116 |
+
if not self.fine_tuning:
|
117 |
+
audio = normalize(audio) * 0.95
|
118 |
+
self.cached_wav = audio
|
119 |
+
if sampling_rate != self.sampling_rate:
|
120 |
+
raise ValueError("{} SR doesn't match target {} SR".format(
|
121 |
+
sampling_rate, self.sampling_rate))
|
122 |
+
self._cache_ref_count = self.n_cache_reuse
|
123 |
+
else:
|
124 |
+
audio = self.cached_wav
|
125 |
+
self._cache_ref_count -= 1
|
126 |
+
|
127 |
+
audio = torch.FloatTensor(audio)
|
128 |
+
audio = audio.unsqueeze(0)
|
129 |
+
|
130 |
+
if not self.fine_tuning:
|
131 |
+
if self.split:
|
132 |
+
if audio.size(1) >= self.segment_size:
|
133 |
+
max_audio_start = audio.size(1) - self.segment_size
|
134 |
+
audio_start = random.randint(0, max_audio_start)
|
135 |
+
audio = audio[:, audio_start:audio_start+self.segment_size]
|
136 |
+
else:
|
137 |
+
audio = torch.nn.functional.pad(audio, (0, self.segment_size - audio.size(1)), 'constant')
|
138 |
+
|
139 |
+
mel = mel_spectrogram(audio, self.n_fft, self.num_mels,
|
140 |
+
self.sampling_rate, self.hop_size, self.win_size, self.fmin, self.fmax,
|
141 |
+
center=False)
|
142 |
+
else:
|
143 |
+
mel = np.load(
|
144 |
+
os.path.join(self.base_mels_path, os.path.splitext(os.path.split(filename)[-1])[0] + '.npy'))
|
145 |
+
mel = torch.from_numpy(mel)
|
146 |
+
|
147 |
+
if len(mel.shape) < 3:
|
148 |
+
mel = mel.unsqueeze(0)
|
149 |
+
|
150 |
+
if self.split:
|
151 |
+
frames_per_seg = math.ceil(self.segment_size / self.hop_size)
|
152 |
+
|
153 |
+
if audio.size(1) >= self.segment_size:
|
154 |
+
mel_start = random.randint(0, mel.size(2) - frames_per_seg - 1)
|
155 |
+
mel = mel[:, :, mel_start:mel_start + frames_per_seg]
|
156 |
+
audio = audio[:, mel_start * self.hop_size:(mel_start + frames_per_seg) * self.hop_size]
|
157 |
+
else:
|
158 |
+
mel = torch.nn.functional.pad(mel, (0, frames_per_seg - mel.size(2)), 'constant')
|
159 |
+
audio = torch.nn.functional.pad(audio, (0, self.segment_size - audio.size(1)), 'constant')
|
160 |
+
|
161 |
+
mel_loss = mel_spectrogram(audio, self.n_fft, self.num_mels,
|
162 |
+
self.sampling_rate, self.hop_size, self.win_size, self.fmin, self.fmax_loss,
|
163 |
+
center=False)
|
164 |
+
|
165 |
+
return (mel.squeeze(), audio.squeeze(0), filename, mel_loss.squeeze())
|
166 |
+
|
167 |
+
def __len__(self):
|
168 |
+
return len(self.audio_files)
|
hifigan/models.py
ADDED
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
import torch.nn as nn
|
4 |
+
from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
|
5 |
+
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
|
6 |
+
from hifigan.utils import init_weights, get_padding
|
7 |
+
|
8 |
+
LRELU_SLOPE = 0.1
|
9 |
+
|
10 |
+
|
11 |
+
class ResBlock1(torch.nn.Module):
|
12 |
+
def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
|
13 |
+
super(ResBlock1, self).__init__()
|
14 |
+
self.h = h
|
15 |
+
self.convs1 = nn.ModuleList([
|
16 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
|
17 |
+
padding=get_padding(kernel_size, dilation[0]))),
|
18 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
|
19 |
+
padding=get_padding(kernel_size, dilation[1]))),
|
20 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
|
21 |
+
padding=get_padding(kernel_size, dilation[2])))
|
22 |
+
])
|
23 |
+
self.convs1.apply(init_weights)
|
24 |
+
|
25 |
+
self.convs2 = nn.ModuleList([
|
26 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
27 |
+
padding=get_padding(kernel_size, 1))),
|
28 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
29 |
+
padding=get_padding(kernel_size, 1))),
|
30 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
|
31 |
+
padding=get_padding(kernel_size, 1)))
|
32 |
+
])
|
33 |
+
self.convs2.apply(init_weights)
|
34 |
+
|
35 |
+
def forward(self, x):
|
36 |
+
for c1, c2 in zip(self.convs1, self.convs2):
|
37 |
+
xt = F.leaky_relu(x, LRELU_SLOPE)
|
38 |
+
xt = c1(xt)
|
39 |
+
xt = F.leaky_relu(xt, LRELU_SLOPE)
|
40 |
+
xt = c2(xt)
|
41 |
+
x = xt + x
|
42 |
+
return x
|
43 |
+
|
44 |
+
def remove_weight_norm(self):
|
45 |
+
for l in self.convs1:
|
46 |
+
remove_weight_norm(l)
|
47 |
+
for l in self.convs2:
|
48 |
+
remove_weight_norm(l)
|
49 |
+
|
50 |
+
|
51 |
+
class ResBlock2(torch.nn.Module):
|
52 |
+
def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
|
53 |
+
super(ResBlock2, self).__init__()
|
54 |
+
self.h = h
|
55 |
+
self.convs = nn.ModuleList([
|
56 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
|
57 |
+
padding=get_padding(kernel_size, dilation[0]))),
|
58 |
+
weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
|
59 |
+
padding=get_padding(kernel_size, dilation[1])))
|
60 |
+
])
|
61 |
+
self.convs.apply(init_weights)
|
62 |
+
|
63 |
+
def forward(self, x):
|
64 |
+
for c in self.convs:
|
65 |
+
xt = F.leaky_relu(x, LRELU_SLOPE)
|
66 |
+
xt = c(xt)
|
67 |
+
x = xt + x
|
68 |
+
return x
|
69 |
+
|
70 |
+
def remove_weight_norm(self):
|
71 |
+
for l in self.convs:
|
72 |
+
remove_weight_norm(l)
|
73 |
+
|
74 |
+
|
75 |
+
class Generator(torch.nn.Module):
|
76 |
+
def __init__(self, h):
|
77 |
+
super(Generator, self).__init__()
|
78 |
+
self.h = h
|
79 |
+
self.num_kernels = len(h.resblock_kernel_sizes)
|
80 |
+
self.num_upsamples = len(h.upsample_rates)
|
81 |
+
self.conv_pre = weight_norm(Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3))
|
82 |
+
resblock = ResBlock1 if h.resblock == '1' else ResBlock2
|
83 |
+
|
84 |
+
self.ups = nn.ModuleList()
|
85 |
+
for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
|
86 |
+
self.ups.append(weight_norm(
|
87 |
+
ConvTranspose1d(h.upsample_initial_channel//(2**i), h.upsample_initial_channel//(2**(i+1)),
|
88 |
+
k, u, padding=(k-u)//2)))
|
89 |
+
|
90 |
+
self.resblocks = nn.ModuleList()
|
91 |
+
for i in range(len(self.ups)):
|
92 |
+
ch = h.upsample_initial_channel//(2**(i+1))
|
93 |
+
for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)):
|
94 |
+
self.resblocks.append(resblock(h, ch, k, d))
|
95 |
+
|
96 |
+
self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
|
97 |
+
self.ups.apply(init_weights)
|
98 |
+
self.conv_post.apply(init_weights)
|
99 |
+
|
100 |
+
def forward(self, x):
|
101 |
+
x = self.conv_pre(x)
|
102 |
+
for i in range(self.num_upsamples):
|
103 |
+
x = F.leaky_relu(x, LRELU_SLOPE)
|
104 |
+
x = self.ups[i](x)
|
105 |
+
xs = None
|
106 |
+
for j in range(self.num_kernels):
|
107 |
+
if xs is None:
|
108 |
+
xs = self.resblocks[i*self.num_kernels+j](x)
|
109 |
+
else:
|
110 |
+
xs += self.resblocks[i*self.num_kernels+j](x)
|
111 |
+
x = xs / self.num_kernels
|
112 |
+
x = F.leaky_relu(x)
|
113 |
+
x = self.conv_post(x)
|
114 |
+
x = torch.tanh(x)
|
115 |
+
|
116 |
+
return x
|
117 |
+
|
118 |
+
def remove_weight_norm(self):
|
119 |
+
print('Removing weight norm...')
|
120 |
+
for l in self.ups:
|
121 |
+
remove_weight_norm(l)
|
122 |
+
for l in self.resblocks:
|
123 |
+
l.remove_weight_norm()
|
124 |
+
remove_weight_norm(self.conv_pre)
|
125 |
+
remove_weight_norm(self.conv_post)
|
126 |
+
|
127 |
+
|
128 |
+
class DiscriminatorP(torch.nn.Module):
|
129 |
+
def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
|
130 |
+
super(DiscriminatorP, self).__init__()
|
131 |
+
self.period = period
|
132 |
+
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
|
133 |
+
self.convs = nn.ModuleList([
|
134 |
+
norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
|
135 |
+
norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
|
136 |
+
norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
|
137 |
+
norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
|
138 |
+
norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))),
|
139 |
+
])
|
140 |
+
self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
|
141 |
+
|
142 |
+
def forward(self, x):
|
143 |
+
fmap = []
|
144 |
+
|
145 |
+
# 1d to 2d
|
146 |
+
b, c, t = x.shape
|
147 |
+
if t % self.period != 0: # pad first
|
148 |
+
n_pad = self.period - (t % self.period)
|
149 |
+
x = F.pad(x, (0, n_pad), "reflect")
|
150 |
+
t = t + n_pad
|
151 |
+
x = x.view(b, c, t // self.period, self.period)
|
152 |
+
|
153 |
+
for l in self.convs:
|
154 |
+
x = l(x)
|
155 |
+
x = F.leaky_relu(x, LRELU_SLOPE)
|
156 |
+
fmap.append(x)
|
157 |
+
x = self.conv_post(x)
|
158 |
+
fmap.append(x)
|
159 |
+
x = torch.flatten(x, 1, -1)
|
160 |
+
|
161 |
+
return x, fmap
|
162 |
+
|
163 |
+
|
164 |
+
class MultiPeriodDiscriminator(torch.nn.Module):
|
165 |
+
def __init__(self):
|
166 |
+
super(MultiPeriodDiscriminator, self).__init__()
|
167 |
+
self.discriminators = nn.ModuleList([
|
168 |
+
DiscriminatorP(2),
|
169 |
+
DiscriminatorP(3),
|
170 |
+
DiscriminatorP(5),
|
171 |
+
DiscriminatorP(7),
|
172 |
+
DiscriminatorP(11),
|
173 |
+
])
|
174 |
+
|
175 |
+
def forward(self, y, y_hat):
|
176 |
+
y_d_rs = []
|
177 |
+
y_d_gs = []
|
178 |
+
fmap_rs = []
|
179 |
+
fmap_gs = []
|
180 |
+
for i, d in enumerate(self.discriminators):
|
181 |
+
y_d_r, fmap_r = d(y)
|
182 |
+
y_d_g, fmap_g = d(y_hat)
|
183 |
+
y_d_rs.append(y_d_r)
|
184 |
+
fmap_rs.append(fmap_r)
|
185 |
+
y_d_gs.append(y_d_g)
|
186 |
+
fmap_gs.append(fmap_g)
|
187 |
+
|
188 |
+
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
|
189 |
+
|
190 |
+
|
191 |
+
class DiscriminatorS(torch.nn.Module):
|
192 |
+
def __init__(self, use_spectral_norm=False):
|
193 |
+
super(DiscriminatorS, self).__init__()
|
194 |
+
norm_f = weight_norm if use_spectral_norm == False else spectral_norm
|
195 |
+
self.convs = nn.ModuleList([
|
196 |
+
norm_f(Conv1d(1, 128, 15, 1, padding=7)),
|
197 |
+
norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),
|
198 |
+
norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)),
|
199 |
+
norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)),
|
200 |
+
norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)),
|
201 |
+
norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)),
|
202 |
+
norm_f(Conv1d(1024, 1024, 5, 1, padding=2)),
|
203 |
+
])
|
204 |
+
self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1))
|
205 |
+
|
206 |
+
def forward(self, x):
|
207 |
+
fmap = []
|
208 |
+
for l in self.convs:
|
209 |
+
x = l(x)
|
210 |
+
x = F.leaky_relu(x, LRELU_SLOPE)
|
211 |
+
fmap.append(x)
|
212 |
+
x = self.conv_post(x)
|
213 |
+
fmap.append(x)
|
214 |
+
x = torch.flatten(x, 1, -1)
|
215 |
+
|
216 |
+
return x, fmap
|
217 |
+
|
218 |
+
|
219 |
+
class MultiScaleDiscriminator(torch.nn.Module):
|
220 |
+
def __init__(self):
|
221 |
+
super(MultiScaleDiscriminator, self).__init__()
|
222 |
+
self.discriminators = nn.ModuleList([
|
223 |
+
DiscriminatorS(use_spectral_norm=True),
|
224 |
+
DiscriminatorS(),
|
225 |
+
DiscriminatorS(),
|
226 |
+
])
|
227 |
+
self.meanpools = nn.ModuleList([
|
228 |
+
AvgPool1d(4, 2, padding=2),
|
229 |
+
AvgPool1d(4, 2, padding=2)
|
230 |
+
])
|
231 |
+
|
232 |
+
def forward(self, y, y_hat):
|
233 |
+
y_d_rs = []
|
234 |
+
y_d_gs = []
|
235 |
+
fmap_rs = []
|
236 |
+
fmap_gs = []
|
237 |
+
for i, d in enumerate(self.discriminators):
|
238 |
+
if i != 0:
|
239 |
+
y = self.meanpools[i-1](y)
|
240 |
+
y_hat = self.meanpools[i-1](y_hat)
|
241 |
+
y_d_r, fmap_r = d(y)
|
242 |
+
y_d_g, fmap_g = d(y_hat)
|
243 |
+
y_d_rs.append(y_d_r)
|
244 |
+
fmap_rs.append(fmap_r)
|
245 |
+
y_d_gs.append(y_d_g)
|
246 |
+
fmap_gs.append(fmap_g)
|
247 |
+
|
248 |
+
return y_d_rs, y_d_gs, fmap_rs, fmap_gs
|
249 |
+
|
250 |
+
|
251 |
+
def feature_loss(fmap_r, fmap_g):
|
252 |
+
loss = 0
|
253 |
+
for dr, dg in zip(fmap_r, fmap_g):
|
254 |
+
for rl, gl in zip(dr, dg):
|
255 |
+
loss += torch.mean(torch.abs(rl - gl))
|
256 |
+
|
257 |
+
return loss*2
|
258 |
+
|
259 |
+
|
260 |
+
def discriminator_loss(disc_real_outputs, disc_generated_outputs):
|
261 |
+
loss = 0
|
262 |
+
r_losses = []
|
263 |
+
g_losses = []
|
264 |
+
for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
|
265 |
+
r_loss = torch.mean((1-dr)**2)
|
266 |
+
g_loss = torch.mean(dg**2)
|
267 |
+
loss += (r_loss + g_loss)
|
268 |
+
r_losses.append(r_loss.item())
|
269 |
+
g_losses.append(g_loss.item())
|
270 |
+
|
271 |
+
return loss, r_losses, g_losses
|
272 |
+
|
273 |
+
|
274 |
+
def generator_loss(disc_outputs):
|
275 |
+
loss = 0
|
276 |
+
gen_losses = []
|
277 |
+
for dg in disc_outputs:
|
278 |
+
l = torch.mean((1-dg)**2)
|
279 |
+
gen_losses.append(l)
|
280 |
+
loss += l
|
281 |
+
|
282 |
+
return loss, gen_losses
|
283 |
+
|
hifigan/train.py
ADDED
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
warnings.simplefilter(action='ignore', category=FutureWarning)
|
3 |
+
import itertools
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
import argparse
|
7 |
+
import json
|
8 |
+
import torch
|
9 |
+
import torch.nn.functional as F
|
10 |
+
from torch.utils.tensorboard import SummaryWriter
|
11 |
+
from torch.utils.data import DistributedSampler, DataLoader
|
12 |
+
import torch.multiprocessing as mp
|
13 |
+
from torch.distributed import init_process_group
|
14 |
+
from torch.nn.parallel import DistributedDataParallel
|
15 |
+
from env import AttrDict, build_env
|
16 |
+
from hifigan.meldataset import MelDataset, mel_spectrogram, get_dataset_filelist
|
17 |
+
from hifigan.models import Generator, MultiPeriodDiscriminator, MultiScaleDiscriminator, feature_loss, generator_loss,\
|
18 |
+
discriminator_loss
|
19 |
+
from hifigan.utils import plot_spectrogram, scan_checkpoint, load_checkpoint, save_checkpoint
|
20 |
+
|
21 |
+
torch.backends.cudnn.benchmark = True
|
22 |
+
|
23 |
+
|
24 |
+
def train(rank, a, h):
|
25 |
+
if h.num_gpus > 1:
|
26 |
+
init_process_group(backend=h.dist_config['dist_backend'], init_method=h.dist_config['dist_url'],
|
27 |
+
world_size=h.dist_config['world_size'] * h.num_gpus, rank=rank)
|
28 |
+
|
29 |
+
torch.cuda.manual_seed(h.seed)
|
30 |
+
device = torch.device('cuda:{:d}'.format(rank))
|
31 |
+
|
32 |
+
generator = Generator(h).to(device)
|
33 |
+
mpd = MultiPeriodDiscriminator().to(device)
|
34 |
+
msd = MultiScaleDiscriminator().to(device)
|
35 |
+
|
36 |
+
if rank == 0:
|
37 |
+
print(generator)
|
38 |
+
os.makedirs(a.checkpoint_path, exist_ok=True)
|
39 |
+
print("checkpoints directory : ", a.checkpoint_path)
|
40 |
+
|
41 |
+
if os.path.isdir(a.checkpoint_path):
|
42 |
+
cp_g = scan_checkpoint(a.checkpoint_path, 'g_')
|
43 |
+
cp_do = scan_checkpoint(a.checkpoint_path, 'do_')
|
44 |
+
|
45 |
+
steps = 0
|
46 |
+
if cp_g is None or cp_do is None:
|
47 |
+
state_dict_do = None
|
48 |
+
last_epoch = -1
|
49 |
+
else:
|
50 |
+
state_dict_g = load_checkpoint(cp_g, device)
|
51 |
+
state_dict_do = load_checkpoint(cp_do, device)
|
52 |
+
generator.load_state_dict(state_dict_g['generator'])
|
53 |
+
mpd.load_state_dict(state_dict_do['mpd'])
|
54 |
+
msd.load_state_dict(state_dict_do['msd'])
|
55 |
+
steps = state_dict_do['steps'] + 1
|
56 |
+
last_epoch = state_dict_do['epoch']
|
57 |
+
|
58 |
+
if h.num_gpus > 1:
|
59 |
+
generator = DistributedDataParallel(generator, device_ids=[rank]).to(device)
|
60 |
+
mpd = DistributedDataParallel(mpd, device_ids=[rank]).to(device)
|
61 |
+
msd = DistributedDataParallel(msd, device_ids=[rank]).to(device)
|
62 |
+
|
63 |
+
optim_g = torch.optim.AdamW(generator.parameters(), h.learning_rate, betas=[h.adam_b1, h.adam_b2])
|
64 |
+
optim_d = torch.optim.AdamW(itertools.chain(msd.parameters(), mpd.parameters()),
|
65 |
+
h.learning_rate, betas=[h.adam_b1, h.adam_b2])
|
66 |
+
|
67 |
+
if state_dict_do is not None:
|
68 |
+
optim_g.load_state_dict(state_dict_do['optim_g'])
|
69 |
+
optim_d.load_state_dict(state_dict_do['optim_d'])
|
70 |
+
|
71 |
+
scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=h.lr_decay, last_epoch=last_epoch)
|
72 |
+
scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=h.lr_decay, last_epoch=last_epoch)
|
73 |
+
|
74 |
+
training_filelist, validation_filelist = get_dataset_filelist(a)
|
75 |
+
|
76 |
+
trainset = MelDataset(training_filelist, h.segment_size, h.n_fft, h.num_mels,
|
77 |
+
h.hop_size, h.win_size, h.sampling_rate, h.fmin, h.fmax, n_cache_reuse=0,
|
78 |
+
shuffle=False if h.num_gpus > 1 else True, fmax_loss=h.fmax_for_loss, device=device,
|
79 |
+
fine_tuning=a.fine_tuning, base_mels_path=a.input_mels_dir)
|
80 |
+
|
81 |
+
train_sampler = DistributedSampler(trainset) if h.num_gpus > 1 else None
|
82 |
+
|
83 |
+
train_loader = DataLoader(trainset, num_workers=h.num_workers, shuffle=False,
|
84 |
+
sampler=train_sampler,
|
85 |
+
batch_size=h.batch_size,
|
86 |
+
pin_memory=True,
|
87 |
+
drop_last=True)
|
88 |
+
|
89 |
+
if rank == 0:
|
90 |
+
validset = MelDataset(validation_filelist, h.segment_size, h.n_fft, h.num_mels,
|
91 |
+
h.hop_size, h.win_size, h.sampling_rate, h.fmin, h.fmax, False, False, n_cache_reuse=0,
|
92 |
+
fmax_loss=h.fmax_for_loss, device=device, fine_tuning=a.fine_tuning,
|
93 |
+
base_mels_path=a.input_mels_dir)
|
94 |
+
validation_loader = DataLoader(validset, num_workers=1, shuffle=False,
|
95 |
+
sampler=None,
|
96 |
+
batch_size=1,
|
97 |
+
pin_memory=True,
|
98 |
+
drop_last=True)
|
99 |
+
|
100 |
+
sw = SummaryWriter(os.path.join(a.checkpoint_path, 'logs'))
|
101 |
+
|
102 |
+
generator.train()
|
103 |
+
mpd.train()
|
104 |
+
msd.train()
|
105 |
+
for epoch in range(max(0, last_epoch), a.training_epochs):
|
106 |
+
if rank == 0:
|
107 |
+
start = time.time()
|
108 |
+
print("Epoch: {}".format(epoch+1))
|
109 |
+
|
110 |
+
if h.num_gpus > 1:
|
111 |
+
train_sampler.set_epoch(epoch)
|
112 |
+
|
113 |
+
for i, batch in enumerate(train_loader):
|
114 |
+
if rank == 0:
|
115 |
+
start_b = time.time()
|
116 |
+
x, y, _, y_mel = batch
|
117 |
+
x = torch.autograd.Variable(x.to(device, non_blocking=True))
|
118 |
+
y = torch.autograd.Variable(y.to(device, non_blocking=True))
|
119 |
+
y_mel = torch.autograd.Variable(y_mel.to(device, non_blocking=True))
|
120 |
+
y = y.unsqueeze(1)
|
121 |
+
|
122 |
+
y_g_hat = generator(x)
|
123 |
+
y_g_hat_mel = mel_spectrogram(y_g_hat.squeeze(1), h.n_fft, h.num_mels, h.sampling_rate, h.hop_size, h.win_size,
|
124 |
+
h.fmin, h.fmax_for_loss)
|
125 |
+
|
126 |
+
optim_d.zero_grad()
|
127 |
+
|
128 |
+
# MPD
|
129 |
+
y_df_hat_r, y_df_hat_g, _, _ = mpd(y, y_g_hat.detach())
|
130 |
+
loss_disc_f, losses_disc_f_r, losses_disc_f_g = discriminator_loss(y_df_hat_r, y_df_hat_g)
|
131 |
+
|
132 |
+
# MSD
|
133 |
+
y_ds_hat_r, y_ds_hat_g, _, _ = msd(y, y_g_hat.detach())
|
134 |
+
loss_disc_s, losses_disc_s_r, losses_disc_s_g = discriminator_loss(y_ds_hat_r, y_ds_hat_g)
|
135 |
+
|
136 |
+
loss_disc_all = loss_disc_s + loss_disc_f
|
137 |
+
|
138 |
+
loss_disc_all.backward()
|
139 |
+
optim_d.step()
|
140 |
+
|
141 |
+
# Generator
|
142 |
+
optim_g.zero_grad()
|
143 |
+
|
144 |
+
# L1 Mel-Spectrogram Loss
|
145 |
+
loss_mel = F.l1_loss(y_mel, y_g_hat_mel) * 45
|
146 |
+
|
147 |
+
y_df_hat_r, y_df_hat_g, fmap_f_r, fmap_f_g = mpd(y, y_g_hat)
|
148 |
+
y_ds_hat_r, y_ds_hat_g, fmap_s_r, fmap_s_g = msd(y, y_g_hat)
|
149 |
+
loss_fm_f = feature_loss(fmap_f_r, fmap_f_g)
|
150 |
+
loss_fm_s = feature_loss(fmap_s_r, fmap_s_g)
|
151 |
+
loss_gen_f, losses_gen_f = generator_loss(y_df_hat_g)
|
152 |
+
loss_gen_s, losses_gen_s = generator_loss(y_ds_hat_g)
|
153 |
+
loss_gen_all = loss_gen_s + loss_gen_f + loss_fm_s + loss_fm_f + loss_mel
|
154 |
+
|
155 |
+
loss_gen_all.backward()
|
156 |
+
optim_g.step()
|
157 |
+
|
158 |
+
if rank == 0:
|
159 |
+
# STDOUT logging
|
160 |
+
if steps % a.stdout_interval == 0:
|
161 |
+
with torch.no_grad():
|
162 |
+
mel_error = F.l1_loss(y_mel, y_g_hat_mel).item()
|
163 |
+
|
164 |
+
print('Steps : {:d}, Gen Loss Total : {:4.3f}, Mel-Spec. Error : {:4.3f}, s/b : {:4.3f}'.
|
165 |
+
format(steps, loss_gen_all, mel_error, time.time() - start_b))
|
166 |
+
|
167 |
+
# checkpointing
|
168 |
+
if steps % a.checkpoint_interval == 0 and steps != 0:
|
169 |
+
checkpoint_path = "{}/g_{:08d}".format(a.checkpoint_path, steps)
|
170 |
+
save_checkpoint(checkpoint_path,
|
171 |
+
{'generator': (generator.module if h.num_gpus > 1 else generator).state_dict()})
|
172 |
+
checkpoint_path = "{}/do_{:08d}".format(a.checkpoint_path, steps)
|
173 |
+
save_checkpoint(checkpoint_path,
|
174 |
+
{'mpd': (mpd.module if h.num_gpus > 1
|
175 |
+
else mpd).state_dict(),
|
176 |
+
'msd': (msd.module if h.num_gpus > 1
|
177 |
+
else msd).state_dict(),
|
178 |
+
'optim_g': optim_g.state_dict(), 'optim_d': optim_d.state_dict(), 'steps': steps,
|
179 |
+
'epoch': epoch})
|
180 |
+
|
181 |
+
# Tensorboard summary logging
|
182 |
+
if steps % a.summary_interval == 0:
|
183 |
+
sw.add_scalar("training/gen_loss_total", loss_gen_all, steps)
|
184 |
+
sw.add_scalar("training/mel_spec_error", mel_error, steps)
|
185 |
+
|
186 |
+
# Validation
|
187 |
+
if steps % a.validation_interval == 0: # and steps != 0:
|
188 |
+
generator.eval()
|
189 |
+
torch.cuda.empty_cache()
|
190 |
+
val_err_tot = 0
|
191 |
+
with torch.no_grad():
|
192 |
+
for j, batch in enumerate(validation_loader):
|
193 |
+
x, y, _, y_mel = batch
|
194 |
+
y_g_hat = generator(x.to(device))
|
195 |
+
y_mel = torch.autograd.Variable(y_mel.to(device, non_blocking=True))
|
196 |
+
y_g_hat_mel = mel_spectrogram(y_g_hat.squeeze(1), h.n_fft, h.num_mels, h.sampling_rate,
|
197 |
+
h.hop_size, h.win_size,
|
198 |
+
h.fmin, h.fmax_for_loss)
|
199 |
+
val_err_tot += F.l1_loss(y_mel, y_g_hat_mel).item()
|
200 |
+
|
201 |
+
if j <= 4:
|
202 |
+
if steps == 0:
|
203 |
+
sw.add_audio('gt/y_{}'.format(j), y[0], steps, h.sampling_rate)
|
204 |
+
sw.add_figure('gt/y_spec_{}'.format(j), plot_spectrogram(x[0]), steps)
|
205 |
+
|
206 |
+
sw.add_audio('generated/y_hat_{}'.format(j), y_g_hat[0], steps, h.sampling_rate)
|
207 |
+
y_hat_spec = mel_spectrogram(y_g_hat.squeeze(1), h.n_fft, h.num_mels,
|
208 |
+
h.sampling_rate, h.hop_size, h.win_size,
|
209 |
+
h.fmin, h.fmax)
|
210 |
+
sw.add_figure('generated/y_hat_spec_{}'.format(j),
|
211 |
+
plot_spectrogram(y_hat_spec.squeeze(0).cpu().numpy()), steps)
|
212 |
+
|
213 |
+
val_err = val_err_tot / (j+1)
|
214 |
+
sw.add_scalar("validation/mel_spec_error", val_err, steps)
|
215 |
+
|
216 |
+
generator.train()
|
217 |
+
|
218 |
+
steps += 1
|
219 |
+
|
220 |
+
scheduler_g.step()
|
221 |
+
scheduler_d.step()
|
222 |
+
|
223 |
+
if rank == 0:
|
224 |
+
print('Time taken for epoch {} is {} sec\n'.format(epoch + 1, int(time.time() - start)))
|
225 |
+
|
226 |
+
|
227 |
+
def main():
|
228 |
+
print('Initializing Training Process..')
|
229 |
+
|
230 |
+
parser = argparse.ArgumentParser()
|
231 |
+
|
232 |
+
parser.add_argument('--group_name', default=None)
|
233 |
+
parser.add_argument('--input_wavs_dir', default='LJSpeech-1.1/wavs')
|
234 |
+
parser.add_argument('--input_mels_dir', default='ft_dataset')
|
235 |
+
parser.add_argument('--input_training_file', default='LJSpeech-1.1/training.txt')
|
236 |
+
parser.add_argument('--input_validation_file', default='LJSpeech-1.1/validation.txt')
|
237 |
+
parser.add_argument('--checkpoint_path', default='cp_hifigan')
|
238 |
+
parser.add_argument('--config', default='')
|
239 |
+
parser.add_argument('--training_epochs', default=3100, type=int)
|
240 |
+
parser.add_argument('--stdout_interval', default=5, type=int)
|
241 |
+
parser.add_argument('--checkpoint_interval', default=5000, type=int)
|
242 |
+
parser.add_argument('--summary_interval', default=100, type=int)
|
243 |
+
parser.add_argument('--validation_interval', default=1000, type=int)
|
244 |
+
parser.add_argument('--fine_tuning', default=False, type=bool)
|
245 |
+
|
246 |
+
a = parser.parse_args()
|
247 |
+
|
248 |
+
with open(a.config) as f:
|
249 |
+
data = f.read()
|
250 |
+
|
251 |
+
json_config = json.loads(data)
|
252 |
+
h = AttrDict(json_config)
|
253 |
+
build_env(a.config, 'config.json', a.checkpoint_path)
|
254 |
+
|
255 |
+
torch.manual_seed(h.seed)
|
256 |
+
if torch.cuda.is_available():
|
257 |
+
torch.cuda.manual_seed(h.seed)
|
258 |
+
h.num_gpus = torch.cuda.device_count()
|
259 |
+
h.batch_size = int(h.batch_size / h.num_gpus)
|
260 |
+
print('Batch size per GPU :', h.batch_size)
|
261 |
+
else:
|
262 |
+
pass
|
263 |
+
|
264 |
+
if h.num_gpus > 1:
|
265 |
+
mp.spawn(train, nprocs=h.num_gpus, args=(a, h,))
|
266 |
+
else:
|
267 |
+
train(0, a, h)
|
268 |
+
|
269 |
+
|
270 |
+
if __name__ == '__main__':
|
271 |
+
main()
|
hifigan/utils.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import glob
|
2 |
+
import os
|
3 |
+
import matplotlib
|
4 |
+
import torch
|
5 |
+
from torch.nn.utils import weight_norm
|
6 |
+
matplotlib.use("Agg")
|
7 |
+
import matplotlib.pylab as plt
|
8 |
+
|
9 |
+
|
10 |
+
def plot_spectrogram(spectrogram):
|
11 |
+
fig, ax = plt.subplots(figsize=(10, 2))
|
12 |
+
im = ax.imshow(spectrogram, aspect="auto", origin="lower",
|
13 |
+
interpolation='none')
|
14 |
+
plt.colorbar(im, ax=ax)
|
15 |
+
|
16 |
+
fig.canvas.draw()
|
17 |
+
plt.close()
|
18 |
+
|
19 |
+
return fig
|
20 |
+
|
21 |
+
|
22 |
+
def init_weights(m, mean=0.0, std=0.01):
|
23 |
+
classname = m.__class__.__name__
|
24 |
+
if classname.find("Conv") != -1:
|
25 |
+
m.weight.data.normal_(mean, std)
|
26 |
+
|
27 |
+
|
28 |
+
def apply_weight_norm(m):
|
29 |
+
classname = m.__class__.__name__
|
30 |
+
if classname.find("Conv") != -1:
|
31 |
+
weight_norm(m)
|
32 |
+
|
33 |
+
|
34 |
+
def get_padding(kernel_size, dilation=1):
|
35 |
+
return int((kernel_size*dilation - dilation)/2)
|
36 |
+
|
37 |
+
|
38 |
+
def load_checkpoint(filepath, device):
|
39 |
+
assert os.path.isfile(filepath)
|
40 |
+
print("Loading '{}'".format(filepath))
|
41 |
+
checkpoint_dict = torch.load(filepath, map_location=device)
|
42 |
+
print("Complete.")
|
43 |
+
return checkpoint_dict
|
44 |
+
|
45 |
+
|
46 |
+
def save_checkpoint(filepath, obj):
|
47 |
+
print("Saving checkpoint to {}".format(filepath))
|
48 |
+
torch.save(obj, filepath)
|
49 |
+
print("Complete.")
|
50 |
+
|
51 |
+
|
52 |
+
def scan_checkpoint(cp_dir, prefix):
|
53 |
+
pattern = os.path.join(cp_dir, prefix + '????????')
|
54 |
+
cp_list = glob.glob(pattern)
|
55 |
+
if len(cp_list) == 0:
|
56 |
+
return None
|
57 |
+
return sorted(cp_list)[-1]
|
58 |
+
|
hyper_parameters.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from text import symbols
|
2 |
+
|
3 |
+
# creating a python dictionary with all hyper parameters
|
4 |
+
|
5 |
+
tacotron_params = {'filter_length': 1024, # audio parameters:
|
6 |
+
'hop_length': 256,
|
7 |
+
'win_length': 1024,
|
8 |
+
'n_mel_channels': 80,
|
9 |
+
'mel_fmin': 0.0,
|
10 |
+
'mel_fmax': 8000.0,
|
11 |
+
'sampling_rate': 22050,
|
12 |
+
'max_wav_value': 32768.0,
|
13 |
+
'clipping_value': 1e-5,
|
14 |
+
'C': 1,
|
15 |
+
# dataset parameters:
|
16 |
+
'load_mel_from_disk': False,
|
17 |
+
'sort_by_length': False,
|
18 |
+
'text_cleaners': ['english_cleaners'],
|
19 |
+
# embedding parameters:
|
20 |
+
'symbols_embedding_length': 512,
|
21 |
+
'n_symbols': len(symbols),
|
22 |
+
# encoder parameters:
|
23 |
+
'encoder_embedding_dim': 512,
|
24 |
+
'encoder_convs': 3,
|
25 |
+
'conv_kernel_size': 5,
|
26 |
+
'conv_stride': 1,
|
27 |
+
'conv_dilation': 1,
|
28 |
+
'w_init_gain': 'relu',
|
29 |
+
# decoder parameters:
|
30 |
+
'number_frames_step': 1,
|
31 |
+
'decoder_rnn_dim': 1024,
|
32 |
+
'prenet_dim': 256,
|
33 |
+
'max_decoder_steps': 1000,
|
34 |
+
'gate_threshold': 0.5, # Need to be reviewed
|
35 |
+
'p_attention_dropout': 0.1,
|
36 |
+
'p_decoder_dropout': 0.1,
|
37 |
+
# attention parameters:
|
38 |
+
'attention_rnn_dim': 1024,
|
39 |
+
'attention_dim': 128,
|
40 |
+
# location features parameters:
|
41 |
+
'attention_location_n_filters': 32,
|
42 |
+
'attention_location_kernel_size': 31,
|
43 |
+
# postnet parameters:
|
44 |
+
'postnet_embedding_dim': 512,
|
45 |
+
'postnet_kernel_size': 5,
|
46 |
+
'postnet_n_convolutions': 5,
|
47 |
+
# GST parameters:
|
48 |
+
'E': 512,
|
49 |
+
'token_num': 3,
|
50 |
+
'num_heads': 1,
|
51 |
+
'seq_ref_enc_filter_size': [3, 7, 11], # phoneme, word/silence, utterance levels respectively
|
52 |
+
'ref_enc_out_channels': [8, 16, 16],
|
53 |
+
# optimization parameters:
|
54 |
+
'use_saved_learning_rate': True,
|
55 |
+
'batch_size': 32, # 64 should be larger than the number of GPUs. Integer multiple of the num. of GPUs
|
56 |
+
'learning_rate': 1e-3,
|
57 |
+
'weight_decay': 1e-6,
|
58 |
+
'grad_clip_thresh': 1.0,
|
59 |
+
'mask_padding': False,
|
60 |
+
# experiment parameters:
|
61 |
+
'epochs': 300, # 160, 500
|
62 |
+
'iters_per_checkpoint': 1500, # 1000. How many iterations before validating
|
63 |
+
'seed': 1234,
|
64 |
+
'dynamic_loss_scaling': True, # CHECK IT OUT!
|
65 |
+
'distributed_run': False,
|
66 |
+
'dist_backend': 'nccl',
|
67 |
+
'dist_url': "/home/alex/PyTorch_TACOTRON_2/pycharm-tacotron2", # CHECK IT OUT!
|
68 |
+
'cudnn_enabled': True,
|
69 |
+
'cudnn_benchmark': False,
|
70 |
+
'fp16_run': False}
|
logger.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import torch.nn.functional as F
|
3 |
+
from tensorboardX import SummaryWriter
|
4 |
+
from plotting_utils import plot_alignment_to_numpy, plot_gst_scores_to_numpy, plot_spectrogram_to_numpy
|
5 |
+
from plotting_utils import plot_gate_outputs_to_numpy
|
6 |
+
|
7 |
+
|
8 |
+
class Tacotron2Logger(SummaryWriter):
|
9 |
+
def __init__(self, logdir):
|
10 |
+
super(Tacotron2Logger, self).__init__(logdir)
|
11 |
+
|
12 |
+
def log_training(self, reduced_loss, grad_norm, learning_rate, duration,
|
13 |
+
iteration):
|
14 |
+
self.add_scalar("training.loss", reduced_loss, iteration)
|
15 |
+
self.add_scalar("grad.norm", grad_norm, iteration)
|
16 |
+
self.add_scalar("learning.rate", learning_rate, iteration)
|
17 |
+
self.add_scalar("duration", duration, iteration)
|
18 |
+
|
19 |
+
def log_validation(self, reduced_loss, model, y, y_pred, gst_scores, iteration):
|
20 |
+
self.add_scalar("validation.loss", reduced_loss, iteration)
|
21 |
+
_, mel_outputs, gate_outputs, alignments, _ = y_pred
|
22 |
+
mel_targets, gate_targets = y
|
23 |
+
|
24 |
+
# plot distribution of parameters
|
25 |
+
for tag, value in model.named_parameters():
|
26 |
+
tag = tag.replace('.', '/')
|
27 |
+
self.add_histogram(tag, value.data.cpu().numpy(), iteration)
|
28 |
+
|
29 |
+
# plot alignment, mel target and predicted, gate target and predicted
|
30 |
+
idx = random.randint(0, alignments.size(0) - 1)
|
31 |
+
|
32 |
+
align_idx = alignments[idx].data.cpu().numpy().T
|
33 |
+
gst_scores = gst_scores.data.cpu().numpy().T
|
34 |
+
# print("Validation GST scores before plotting to tensorboard: {}".format(gst_scores.shape))
|
35 |
+
meltarg_idx = mel_targets[idx].data.cpu().numpy()
|
36 |
+
melout_idx = mel_outputs[idx].data.cpu().numpy()
|
37 |
+
|
38 |
+
self.add_image("alignment", plot_alignment_to_numpy(align_idx), iteration)
|
39 |
+
self.add_image("gst_scores", plot_gst_scores_to_numpy(gst_scores), iteration)
|
40 |
+
self.add_image("mel_target", plot_spectrogram_to_numpy(meltarg_idx), iteration)
|
41 |
+
self.add_image("mel_predicted", plot_spectrogram_to_numpy(melout_idx), iteration)
|
42 |
+
self.add_image(
|
43 |
+
"gate",
|
44 |
+
plot_gate_outputs_to_numpy(
|
45 |
+
gate_targets[idx].data.cpu().numpy(),
|
46 |
+
F.sigmoid(gate_outputs[idx]).data.cpu().numpy()),
|
47 |
+
iteration)
|
loss_function.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch import nn
|
2 |
+
|
3 |
+
|
4 |
+
class Tacotron2Loss(nn.Module):
|
5 |
+
def __init__(self):
|
6 |
+
super(Tacotron2Loss, self).__init__()
|
7 |
+
|
8 |
+
def forward(self, model_output, targets):
|
9 |
+
mel_target, gate_target = targets[0], targets[1]
|
10 |
+
mel_target.requires_grad = False
|
11 |
+
gate_target.requires_grad = False
|
12 |
+
# Ensures dimension 1 will be size 1, the rest can be adapted. It is a column of length 189 with all zeroes
|
13 |
+
# till the end of the current sequence, which is filled with 1's
|
14 |
+
gate_target = gate_target.view(-1, 1)
|
15 |
+
|
16 |
+
mel_out, mel_out_postnet, gate_out, _, _ = model_output
|
17 |
+
gate_out = gate_out.view(-1, 1)
|
18 |
+
# Mean Square Error (L2) loss function for decoder generation + post net generation
|
19 |
+
mel_loss = nn.MSELoss()(mel_out, mel_target) + \
|
20 |
+
nn.MSELoss()(mel_out_postnet, mel_target)
|
21 |
+
# Binary Cross Entropy with a Sigmoid layer combined. It is more efficient than using a plain Sigmoid
|
22 |
+
# followed by a BCELoss as, by combining the operations into one layer, we take advantage of the log-sum-exp
|
23 |
+
# trick for numerical stability
|
24 |
+
gate_loss = nn.BCEWithLogitsLoss()(gate_out, gate_target)
|
25 |
+
return mel_loss + gate_loss
|
loss_scaler.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
class LossScaler:
|
4 |
+
|
5 |
+
def __init__(self, scale=1):
|
6 |
+
self.cur_scale = scale
|
7 |
+
|
8 |
+
# `params` is a list / generator of torch.Variable
|
9 |
+
def has_overflow(self, params):
|
10 |
+
return False
|
11 |
+
|
12 |
+
# `x` is a torch.Tensor
|
13 |
+
def _has_inf_or_nan(x):
|
14 |
+
return False
|
15 |
+
|
16 |
+
# `overflow` is boolean indicating whether we overflowed in gradient
|
17 |
+
def update_scale(self, overflow):
|
18 |
+
pass
|
19 |
+
|
20 |
+
@property
|
21 |
+
def loss_scale(self):
|
22 |
+
return self.cur_scale
|
23 |
+
|
24 |
+
def scale_gradient(self, module, grad_in, grad_out):
|
25 |
+
return tuple(self.loss_scale * g for g in grad_in)
|
26 |
+
|
27 |
+
def backward(self, loss):
|
28 |
+
scaled_loss = loss*self.loss_scale
|
29 |
+
scaled_loss.backward()
|
30 |
+
|
31 |
+
class DynamicLossScaler:
|
32 |
+
|
33 |
+
def __init__(self,
|
34 |
+
init_scale=2**32,
|
35 |
+
scale_factor=2.,
|
36 |
+
scale_window=1000):
|
37 |
+
self.cur_scale = init_scale
|
38 |
+
self.cur_iter = 0
|
39 |
+
self.last_overflow_iter = -1
|
40 |
+
self.scale_factor = scale_factor
|
41 |
+
self.scale_window = scale_window
|
42 |
+
|
43 |
+
# `params` is a list / generator of torch.Variable
|
44 |
+
def has_overflow(self, params):
|
45 |
+
for p in params:
|
46 |
+
if p.grad is not None and DynamicLossScaler._has_inf_or_nan(p.grad.data):
|
47 |
+
return True
|
48 |
+
|
49 |
+
return False
|
50 |
+
|
51 |
+
# `x` is a torch.Tensor
|
52 |
+
def _has_inf_or_nan(x):
|
53 |
+
cpu_sum = float(x.float().sum())
|
54 |
+
if cpu_sum == float('inf') or cpu_sum == -float('inf') or cpu_sum != cpu_sum:
|
55 |
+
return True
|
56 |
+
return False
|
57 |
+
|
58 |
+
# `overflow` is boolean indicating whether we overflowed in gradient
|
59 |
+
def update_scale(self, overflow):
|
60 |
+
if overflow:
|
61 |
+
#self.cur_scale /= self.scale_factor
|
62 |
+
self.cur_scale = max(self.cur_scale/self.scale_factor, 1)
|
63 |
+
self.last_overflow_iter = self.cur_iter
|
64 |
+
else:
|
65 |
+
if (self.cur_iter - self.last_overflow_iter) % self.scale_window == 0:
|
66 |
+
self.cur_scale *= self.scale_factor
|
67 |
+
# self.cur_scale = 1
|
68 |
+
self.cur_iter += 1
|
69 |
+
|
70 |
+
@property
|
71 |
+
def loss_scale(self):
|
72 |
+
return self.cur_scale
|
73 |
+
|
74 |
+
def scale_gradient(self, module, grad_in, grad_out):
|
75 |
+
return tuple(self.loss_scale * g for g in grad_in)
|
76 |
+
|
77 |
+
def backward(self, loss):
|
78 |
+
scaled_loss = loss*self.loss_scale
|
79 |
+
scaled_loss.backward()
|
models/checkpoint_78000.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eceff019d3759973f7450bedffdd684ecceadc8b51e931c50fb0e8b0d3c216e8
|
3 |
+
size 363947236
|
models/config.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"resblock": "1",
|
3 |
+
"num_gpus": 0,
|
4 |
+
"batch_size": 16,
|
5 |
+
"learning_rate": 0.0004,
|
6 |
+
"adam_b1": 0.8,
|
7 |
+
"adam_b2": 0.99,
|
8 |
+
"lr_decay": 0.999,
|
9 |
+
"seed": 1234,
|
10 |
+
|
11 |
+
"upsample_rates": [8,8,2,2],
|
12 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
13 |
+
"upsample_initial_channel": 512,
|
14 |
+
"resblock_kernel_sizes": [3,7,11],
|
15 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
16 |
+
"resblock_initial_channel": 256,
|
17 |
+
|
18 |
+
"segment_size": 8192,
|
19 |
+
"num_mels": 80,
|
20 |
+
"num_freq": 1025,
|
21 |
+
"n_fft": 1024,
|
22 |
+
"hop_size": 256,
|
23 |
+
"win_size": 1024,
|
24 |
+
|
25 |
+
"sampling_rate": 22050,
|
26 |
+
|
27 |
+
"fmin": 0,
|
28 |
+
"fmax": 8000,
|
29 |
+
"fmax_loss": null,
|
30 |
+
|
31 |
+
"num_workers": 4,
|
32 |
+
|
33 |
+
"dist_config": {
|
34 |
+
"dist_backend": "nccl",
|
35 |
+
"dist_url": "tcp://localhost:54321",
|
36 |
+
"world_size": 1
|
37 |
+
}
|
38 |
+
}
|
models/generator_v1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64aa99598e561596c69cb86f738890a8400fac97d367159ba6ee1bbb9e348cde
|
3 |
+
size 55788858
|
models/nvidia_tacotron2_LJ11_epoch6400.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06ead35b0169ee0c560dde9b3fe5fb8ccea2989ef9b7385487fbdd4dd22dda19
|
3 |
+
size 17090302
|
multiproc.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import time
|
2 |
+
import torch
|
3 |
+
import sys
|
4 |
+
import subprocess
|
5 |
+
|
6 |
+
argslist = list(sys.argv)[1:]
|
7 |
+
num_gpus = torch.cuda.device_count()
|
8 |
+
argslist.append('--n_gpus={}'.format(num_gpus))
|
9 |
+
workers = []
|
10 |
+
job_id = time.strftime("%Y_%m_%d-%H%M%S")
|
11 |
+
argslist.append("--group_name=group_{}".format(job_id))
|
12 |
+
|
13 |
+
for i in range(num_gpus):
|
14 |
+
argslist.append('--rank={}'.format(i))
|
15 |
+
stdout = None if i == 0 else open("logs/{}_GPU_{}.log".format(job_id, i),
|
16 |
+
"w")
|
17 |
+
print(argslist)
|
18 |
+
p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout)
|
19 |
+
workers.append(p)
|
20 |
+
argslist = argslist[:-1]
|
21 |
+
|
22 |
+
for p in workers:
|
23 |
+
p.wait()
|
nn_layers.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from librosa.filters import mel as librosa_mel_fn
|
4 |
+
from stft import STFT
|
5 |
+
|
6 |
+
torch.manual_seed(1234)
|
7 |
+
|
8 |
+
clip_val = 1e-5
|
9 |
+
C = 1
|
10 |
+
|
11 |
+
|
12 |
+
class convolutional_module(nn.Module):
|
13 |
+
"""This class defines a 1d convolutional layer and its initialization for the system we are
|
14 |
+
replicating"""
|
15 |
+
def __init__(self, in_ch, out_ch, kernel_size=1, stride=1, padding=None, dilation=1, bias=True,
|
16 |
+
w_init_gain='linear'):
|
17 |
+
# in PyTorch you define your Models as subclasses of torch.nn.Module
|
18 |
+
super(convolutional_module, self).__init__()
|
19 |
+
if padding is None:
|
20 |
+
assert(kernel_size % 2 == 1)
|
21 |
+
padding = int(dilation * (kernel_size - 1) / 2)
|
22 |
+
|
23 |
+
# initialize the convolutional layer which is an instance of Conv1d
|
24 |
+
# torch.nn.Conv1d calls internally the method torch.nn.functional.conv1d, which accepts the
|
25 |
+
# input with the shape (minibatch x in_channels x input_w), and a weight of shape
|
26 |
+
# (out_channels x (in_channels/groups) x kernel_w). In our case, we do not split into groups.
|
27 |
+
# Then, our input shape will be (48 x 512 x 189) and the weights are set up as
|
28 |
+
# (512 x 512 x 5)
|
29 |
+
self.conv_layer = torch.nn.Conv1d(in_ch, out_ch, kernel_size=kernel_size, stride=stride,
|
30 |
+
padding=padding, dilation=dilation, bias=bias)
|
31 |
+
|
32 |
+
"""Useful information of Xavier initialization in:
|
33 |
+
https://prateekvjoshi.com/2016/03/29/understanding-xavier-initialization-in-deep-neural-networks/"""
|
34 |
+
torch.nn.init.xavier_uniform_(self.conv_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
|
35 |
+
|
36 |
+
def forward(self, x):
|
37 |
+
conv_output = self.conv_layer(x)
|
38 |
+
return conv_output
|
39 |
+
|
40 |
+
|
41 |
+
class linear_module(torch.nn.Module):
|
42 |
+
"""This class defines a linear layer and its initialization method for the system we are
|
43 |
+
replicating. This implements a linear transformation: y = xA^t + b"""
|
44 |
+
def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
|
45 |
+
super(linear_module, self).__init__()
|
46 |
+
self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)
|
47 |
+
|
48 |
+
torch.nn.init.xavier_uniform_(self.linear_layer.weight, gain=torch.nn.init.calculate_gain(w_init_gain))
|
49 |
+
|
50 |
+
def forward(self, x):
|
51 |
+
return self.linear_layer(x)
|
52 |
+
|
53 |
+
|
54 |
+
class location_layer(nn.Module):
|
55 |
+
def __init__(self, attention_n_filters, attention_kernel_size, attention_dim):
|
56 |
+
super(location_layer, self).__init__()
|
57 |
+
padding = int((attention_kernel_size - 1) / 2)
|
58 |
+
"""We are being very restricting without training a bias"""
|
59 |
+
"""I think in_channels = 2 is k (number of vectors for every encoded stage position from prev.
|
60 |
+
alignment)."""
|
61 |
+
self.location_conv = convolutional_module(2, attention_n_filters, kernel_size=attention_kernel_size,
|
62 |
+
padding=padding, bias=False, stride=1, dilation=1)
|
63 |
+
self.location_dense = linear_module(attention_n_filters, attention_dim, bias=False,
|
64 |
+
w_init_gain='tanh')
|
65 |
+
|
66 |
+
def forward(self, attention_weights_cat):
|
67 |
+
processed_attention = self.location_conv(attention_weights_cat)
|
68 |
+
processed_attention = processed_attention.transpose(1, 2)
|
69 |
+
processed_attention = self.location_dense(processed_attention)
|
70 |
+
return processed_attention
|
71 |
+
|
72 |
+
|
73 |
+
class TacotronSTFT(nn.Module):
|
74 |
+
def __init__(self, filter_length=1024, hop_length=256, win_length=1024,
|
75 |
+
n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0,
|
76 |
+
mel_fmax=8000.0):
|
77 |
+
super(TacotronSTFT, self).__init__()
|
78 |
+
self.n_mel_channels = n_mel_channels
|
79 |
+
self.sampling_rate = sampling_rate
|
80 |
+
self.stft_fn = STFT(filter_length, hop_length, win_length)
|
81 |
+
mel_basis = librosa_mel_fn(sr=sampling_rate, n_fft=filter_length, n_mels=n_mel_channels,
|
82 |
+
fmin=mel_fmin, fmax=mel_fmax)
|
83 |
+
mel_basis = torch.from_numpy(mel_basis).float()
|
84 |
+
self.register_buffer('mel_basis', mel_basis)
|
85 |
+
|
86 |
+
def spectral_de_normalize(self, magnitudes):
|
87 |
+
output = torch.exp(magnitudes) / C
|
88 |
+
return output
|
89 |
+
|
90 |
+
def mel_spectrogram(self, y):
|
91 |
+
"""Computes mel-spectrograms from a batch of waves
|
92 |
+
PARAMS
|
93 |
+
------
|
94 |
+
y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1]
|
95 |
+
|
96 |
+
RETURNS
|
97 |
+
-------
|
98 |
+
mel_output: torch.FloatTensor of shape (B, n_mel_channels, T)
|
99 |
+
"""
|
100 |
+
assert(torch.min(y.data) >= -1)
|
101 |
+
assert(torch.max(y.data) <= 1)
|
102 |
+
|
103 |
+
magnitudes, phases = self.stft_fn.transform(y)
|
104 |
+
magnitudes = magnitudes.data
|
105 |
+
mel_output = torch.matmul(self.mel_basis, magnitudes)
|
106 |
+
mel_output = torch.log(torch.clamp(mel_output, min=clip_val) * C)
|
107 |
+
return mel_output
|
plotting_utils.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib
|
2 |
+
matplotlib.use("Agg")
|
3 |
+
import matplotlib.pylab as plt
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
|
7 |
+
def save_figure_to_numpy(fig):
|
8 |
+
# save it to a numpy array.
|
9 |
+
data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
|
10 |
+
data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
|
11 |
+
return data
|
12 |
+
|
13 |
+
|
14 |
+
def plot_alignment_to_numpy(alignment, info=None):
|
15 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
16 |
+
im = ax.imshow(alignment, aspect='auto', origin='lower',
|
17 |
+
interpolation='none')
|
18 |
+
fig.colorbar(im, ax=ax)
|
19 |
+
xlabel = 'Decoder timestep'
|
20 |
+
if info is not None:
|
21 |
+
xlabel += '\n\n' + info
|
22 |
+
plt.xlabel(xlabel)
|
23 |
+
plt.ylabel('Encoder timestep')
|
24 |
+
plt.tight_layout()
|
25 |
+
|
26 |
+
fig.canvas.draw()
|
27 |
+
data = save_figure_to_numpy(fig)
|
28 |
+
plt.close()
|
29 |
+
data = data.transpose(2, 0, 1)
|
30 |
+
return data
|
31 |
+
|
32 |
+
|
33 |
+
def plot_gst_scores_to_numpy(gst_scores, info=None):
|
34 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
35 |
+
im = ax.imshow(gst_scores, aspect='auto', origin='lower',
|
36 |
+
interpolation='none')
|
37 |
+
fig.colorbar(im, ax=ax)
|
38 |
+
xlabel = 'Validation samples'
|
39 |
+
if info is not None:
|
40 |
+
xlabel += '\n\n' + info
|
41 |
+
plt.xlabel(xlabel)
|
42 |
+
plt.ylabel('Style Tokens')
|
43 |
+
plt.tight_layout()
|
44 |
+
|
45 |
+
fig.canvas.draw()
|
46 |
+
data = save_figure_to_numpy(fig)
|
47 |
+
plt.close()
|
48 |
+
data = data.transpose(2, 0, 1)
|
49 |
+
return data
|
50 |
+
|
51 |
+
|
52 |
+
def plot_spectrogram_to_numpy(spectrogram):
|
53 |
+
fig, ax = plt.subplots(figsize=(12, 3))
|
54 |
+
im = ax.imshow(spectrogram, aspect="auto", origin="lower",
|
55 |
+
interpolation='none')
|
56 |
+
plt.colorbar(im, ax=ax)
|
57 |
+
plt.xlabel("Frames")
|
58 |
+
plt.ylabel("Channels")
|
59 |
+
plt.tight_layout()
|
60 |
+
|
61 |
+
fig.canvas.draw()
|
62 |
+
data = save_figure_to_numpy(fig)
|
63 |
+
plt.close()
|
64 |
+
data = data.transpose(2, 0, 1)
|
65 |
+
return data
|
66 |
+
|
67 |
+
|
68 |
+
def plot_gate_outputs_to_numpy(gate_targets, gate_outputs):
|
69 |
+
fig, ax = plt.subplots(figsize=(12, 3))
|
70 |
+
ax.scatter(range(len(gate_targets)), gate_targets, alpha=0.5,
|
71 |
+
color='green', marker='+', s=1, label='target')
|
72 |
+
ax.scatter(range(len(gate_outputs)), gate_outputs, alpha=0.5,
|
73 |
+
color='red', marker='.', s=1, label='predicted')
|
74 |
+
|
75 |
+
plt.xlabel("Frames (Green target, Red predicted)")
|
76 |
+
plt.ylabel("Gate State")
|
77 |
+
plt.tight_layout()
|
78 |
+
|
79 |
+
fig.canvas.draw()
|
80 |
+
data = save_figure_to_numpy(fig)
|
81 |
+
plt.close()
|
82 |
+
data = data.transpose(2, 0, 1)
|
83 |
+
return data
|