Update README.md
Browse files
README.md
CHANGED
@@ -23,6 +23,16 @@ tags:
|
|
23 |
|
24 |
ASR model + pitch aware relative positional embeddings.
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
<img width="1363" height="732" alt="pitch_spectrogram" src="https://github.com/user-attachments/assets/ceb65e94-7df4-41b7-aa3d-c4aa4c6c0717" />
|
27 |
|
28 |
<img width="233" height="77" alt="legend" src="https://github.com/user-attachments/assets/fad84550-a199-43b3-8471-d011a9fd6f94" />
|
|
|
23 |
|
24 |
ASR model + pitch aware relative positional embeddings.
|
25 |
|
26 |
+
### Decrease WER significantly compared to standard inverse frequency. 'eval_wer': 35.3
|
27 |
+
|
28 |
+
def _compute_freqs_base(self):
|
29 |
+
mel_scale = torch.pow(10, torch.linspace(0, 2595 * torch.log10(torch.tensor(1 + 4000/200)), self.head_dim // 2, device=device, dtype=dtype) / 2595) - 1
|
30 |
+
return 200 * mel_scale / 1000
|
31 |
+
|
32 |
+
### Standared inv freqs: 'eval_wer': 61.6
|
33 |
+
freqs = 1.0 / (self.theta ** (torch.arange(0, self.head_dim, 2, device=device, dtype=dtype) / (self.head_dim // 2)))
|
34 |
+
|
35 |
+
|
36 |
<img width="1363" height="732" alt="pitch_spectrogram" src="https://github.com/user-attachments/assets/ceb65e94-7df4-41b7-aa3d-c4aa4c6c0717" />
|
37 |
|
38 |
<img width="233" height="77" alt="legend" src="https://github.com/user-attachments/assets/fad84550-a199-43b3-8471-d011a9fd6f94" />
|