warisqr7
/

accent-id-commonaccent_xlsr-en-english

Audio Classification

Accent Identification

Model card Files Files and versions Community

warisqr7 commited on Aug 14

Commit

93fa313

•

1 Parent(s): 3103d3b

Update custom_interface.py

Files changed (1) hide show

custom_interface.py +33 -0

custom_interface.py CHANGED Viewed

@@ -152,6 +152,39 @@ class CustomEncoderWav2vec2Classifier(Pretrained):
         text_lab = self.hparams.label_encoder.decode_torch(index)
         return out_prob, score, index, text_lab
     def forward(self, wavs, wav_lens=None, normalize=False):
         return self.encode_batch(
             wavs=wavs, wav_lens=wav_lens, normalize=normalize

         text_lab = self.hparams.label_encoder.decode_torch(index)
         return out_prob, score, index, text_lab
+    def classify_sample(self, sample, sr):
+        """Classifies the given audio sample into the given set of labels.
+        Arguments
+        ---------
+        sample : torch tensor
+            wav tensor. ([T, 1])
+        sr: int
+            sampling rate.
+        Returns
+        -------
+        out_prob
+            The log posterior probabilities of each class ([batch, N_class])
+        score:
+            It is the value of the log-posterior for the best class ([batch,])
+        index
+            The indexes of the best class ([batch,])
+        text_lab:
+            List with the text labels corresponding to the indexes.
+            (label encoder should be provided).
+        """
+        # Fake a batch:
+        waveform = self.audio_normalizer(sample, sr)
+        batch = waveform.unsqueeze(0)
+        rel_length = torch.tensor([1.0])
+        outputs = self.encode_batch(batch, rel_length)
+        outputs = self.mods.output_mlp(outputs).squeeze(1)
+        out_prob = self.hparams.softmax(outputs)
+        score, index = torch.max(out_prob, dim=-1)
+        text_lab = self.hparams.label_encoder.decode_torch(index)
+        return out_prob, score, index, text_lab
     def forward(self, wavs, wav_lens=None, normalize=False):
         return self.encode_batch(
             wavs=wavs, wav_lens=wav_lens, normalize=normalize