File size: 6,657 Bytes
8235b4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

# Authors: Yossi Adi (adiyoss)

import argparse
from concurrent.futures import ProcessPoolExecutor
import json
import logging
import sys

import numpy as np
from pesq import pesq
from pystoi import stoi
import torch

from .models.sisnr_loss import cal_loss
from .data.data import Validset
from . import distrib
from .utils import bold, deserialize_model, LogProgress
from .evaluate import _run_metrics


logger = logging.getLogger(__name__)

parser = argparse.ArgumentParser(
    'Evaluate model automatic selection performance')
parser.add_argument('model_path_2spk',
                    help='Path to 2spk model file created by training')
parser.add_argument('model_path_3spk',
                    help='Path to 3spk model file created by training')
parser.add_argument('model_path_4spk',
                    help='Path to 4spk model file created by training')
parser.add_argument('model_path_5spk',
                    help='Path to 5spk model file created by training')
parser.add_argument(
    'data_dir', help='directory including mix.json, s1.json and s2.json files')
parser.add_argument('--device', default="cuda")
parser.add_argument('--sample_rate', default=8000,
                    type=int, help='Sample rate')
parser.add_argument('--thresh', default=0.001,
                    type=float, help='Threshold for model auto selection')
parser.add_argument('--num_workers', type=int, default=5)
parser.add_argument('-v', '--verbose', action='store_const', const=logging.DEBUG,
                    default=logging.INFO, help="More loggging")



# test pariwise matching
def pair_wise(padded_source, estimate_source):
    pair_wise = torch.sum(padded_source.unsqueeze(
        1)*estimate_source.unsqueeze(2), dim=3)
    if estimate_source.shape[1] != padded_source.shape[1]:
        idxs = pair_wise.argmax(dim=1)
        new_src = torch.FloatTensor(padded_source.shape)
        for b, idx in enumerate(idxs):
            new_src[b:, :, ] = estimate_source[b][idx]
        padded_source_pad = padded_source
        estimate_source_pad = new_src.cuda()
    else:
        padded_source_pad = padded_source
        estimate_source_pad = estimate_source
    return estimate_source_pad


def evaluate_auto_select(args):
    total_sisnr = 0
    total_pesq = 0
    total_stoi = 0
    total_cnt = 0
    updates = 5

    models = list()
    paths = [args.model_path_2spk, args.model_path_3spk,
             args.model_path_4spk, args.model_path_5spk]

    for path in paths:
        # Load model
        pkg = torch.load(path)
        if 'model' in pkg:
            model = pkg['model']
        else:
            model = pkg
        model = deserialize_model(model)
        if 'best_state' in pkg:
            model.load_state_dict(pkg['best_state'])
        logger.debug(model)

        model.eval()
        model.to(args.device)
        models.append(model)

    # Load data
    dataset = Validset(args.data_dir)
    data_loader = distrib.loader(
        dataset, batch_size=1, num_workers=args.num_workers)
    sr = args.sample_rate
    y_hat = torch.zeros((4))

    pendings = []
    with ProcessPoolExecutor(args.num_workers) as pool:
        with torch.no_grad():
            iterator = LogProgress(logger, data_loader, name="Eval estimates")
            for i, data in enumerate(iterator):
                # Get batch data
                mixture, lengths, sources = [x.to(args.device) for x in data]
                estimated_sources = list()
                reorder_estimated_sources = list()

                for model in models:
                    # Forward
                    with torch.no_grad():
                        raw_estimate = model(mixture)[-1]

                    estimate = pair_wise(sources, raw_estimate)
                    sisnr_loss, snr, estimate, reorder_estimate = cal_loss(
                        sources, estimate, lengths)
                    estimated_sources.insert(0, raw_estimate)
                    reorder_estimated_sources.insert(0, reorder_estimate)

                # =================== DETECT NUM. NON-ACTIVE CHANNELS ============== #
                selected_idx = 0
                thresh = args.thresh
                max_spk = 5
                mix_spk = 2
                ground = (max_spk - mix_spk)
                while (selected_idx <= ground):
                    no_sils = 0
                    vals = torch.mean(
                        (estimated_sources[selected_idx]/torch.abs(estimated_sources[selected_idx]).max())**2, axis=2)
                    new_selected_idx = max_spk - len(vals[vals > thresh])
                    if new_selected_idx == selected_idx:
                        break
                    else:
                        selected_idx = new_selected_idx
                if selected_idx < 0:
                    selected_idx = 0
                elif selected_idx > ground:
                    selected_idx = ground

                y_hat[ground - selected_idx] += 1
                reorder_estimate = reorder_estimated_sources[selected_idx].cpu(
                )
                sources = sources.cpu()
                mixture = mixture.cpu()

                pendings.append(
                    pool.submit(_run_metrics, sources, reorder_estimate, mixture, None,
                                sr=sr))
                total_cnt += sources.shape[0]

            for pending in LogProgress(logger, pendings, updates, name="Eval metrics"):
                sisnr_i, pesq_i, stoi_i = pending.result()
                total_sisnr += sisnr_i
                total_pesq += pesq_i
                total_stoi += stoi_i

    metrics = [total_sisnr, total_pesq, total_stoi]
    sisnr, pesq, stoi = distrib.average(
        [m/total_cnt for m in metrics], total_cnt)
    logger.info(bold(f'Test set performance: SISNRi={sisnr:.2f} '
                     f'PESQ={pesq}, STOI={stoi}.'))
    logger.info(f'Two spks prob: {y_hat[0]/(total_cnt)}')
    logger.info(f'Three spks prob: {y_hat[1]/(total_cnt)}')
    logger.info(f'Four spks prob: {y_hat[2]/(total_cnt)}')
    logger.info(f'Five spks prob: {y_hat[3]/(total_cnt)}')
    return sisnr, pesq, stoi


def main():
    args = parser.parse_args()
    logging.basicConfig(stream=sys.stderr, level=args.verbose)
    logger.debug(args)
    sisnr, pesq, stoi = evaluate_auto_select(args)
    json.dump({'sisnr': sisnr,
               'pesq': pesq, 'stoi': stoi}, sys.stdout)
    sys.stdout.write('\n')


if __name__ == '__main__':
    main()