diff --git "a/EDA.ipynb" "b/EDA.ipynb" new file mode 100644--- /dev/null +++ "b/EDA.ipynb" @@ -0,0 +1,423 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "f800718e-c29f-44d8-bf41-e02d50d0f730", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-26T20:30:02.452497923Z", + "start_time": "2023-04-26T20:30:02.439301900Z" + } + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from pathlib import Path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "dataset_path = Path('/media/ic/datasets/denoising/DS_10283_2791')\n", + "clean_path = dataset_path / 'clean_testset_wav'\n", + "noisy_path = dataset_path / 'noisy_testset_wav'" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f236e6df-2e29-4100-9549-8566a1dc1307", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-26T20:30:04.839139680Z", + "start_time": "2023-04-26T20:30:04.790014498Z" + } + }, + "outputs": [], + "source": [ + "clean_wavs = list(clean_path.glob(\"*\"))\n", + "noisy_wavs = list(noisy_path.glob(\"*\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "023c655d-2515-4f29-ba87-1c17d87acf97", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-26T20:30:05.573748442Z", + "start_time": "2023-04-26T20:30:05.544662854Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(824, 824)" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(clean_wavs), len(noisy_wavs)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f45674a2-586e-49e0-85c4-2abdc9f27697", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-26T20:30:07.016766545Z", + "start_time": "2023-04-26T20:30:06.992794443Z" + } + }, + "outputs": [], + "source": [ + "from IPython.display import Audio, display" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7303c87b-ffc2-4203-93e1-0d5ccde3d553", + "metadata": { + "ExecuteTime": { + "end_time": "2023-04-26T20:30:07.683999610Z", + "start_time": "2023-04-26T20:30:07.590614325Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "", + "text/html": "\n \n " + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "", + "text/html": "\n \n " + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def display_pair(i):\n", + " display(Audio(noisy_wavs[i], rate=48000))\n", + " display(Audio(clean_wavs[i], rate=48000))\n", + "display_pair(-1)" + ] + }, + { + "cell_type": "markdown", + "id": "b3a93ce0-aa4a-416a-a8d7-398dbd19236b", + "metadata": {}, + "source": [ + "- SDR https://torchmetrics.readthedocs.io/en/stable/audio/signal_distortion_ratio.html\n", + "- SI-SNR https://torchmetrics.readthedocs.io/en/stable/audio/scale_invariant_signal_noise_ratio.html?highlight=Si-SNR" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "37404b32-dc25-4c70-8aca-6849c1a611bf", + "metadata": {}, + "outputs": [], + "source": [ + "from torchmetrics.audio.pesq import PerceptualEvaluationSpeechQuality\n", + "from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility\n", + "import torch\n", + "import torchaudio\n", + "\n", + "from denoisers.SpectralGating import SpectralGating\n", + "\n", + "\n", + "class Metrics:\n", + " def __init__(self, rate=16000):\n", + " self.nb_pesq = PerceptualEvaluationSpeechQuality(rate, 'wb')\n", + " self.stoi = ShortTimeObjectiveIntelligibility(rate, False)\n", + " def calculate(self, preds, target):\n", + " return {'PESQ': self.nb_pesq(preds, target), \n", + " 'STOI': self.stoi(preds, target)}\n", + "\n", + "def load_wav(path):\n", + " wav, org_sr = torchaudio.load(path)\n", + " wav = torchaudio.functional.resample(wav, orig_freq=org_sr, new_freq=16000)\n", + " return wav\n", + "\n", + "\n", + "\n", + "\n", + "model = SpectralGating()\n", + "metrics = Metrics()\n", + "\n", + "clean_wav = load_wav(clean_wavs[0])\n", + "noisy_wav = load_wav(noisy_wavs[0])\n", + "denoised = model(noisy_wav)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "af9d9987-19dd-498e-8f83-6601bca17013", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PESQ': tensor(1.2861), 'STOI': tensor(0.9472)}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics.calculate(noisy_wav, clean_wav)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "15d7cb6e-951a-42dd-ae23-1838bcdcbd77", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PESQ': tensor(1.5215), 'STOI': tensor(0.9407)}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics.calculate(denoised, clean_wav)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "abeea748-a9c4-4f1c-97f5-66b441136e52", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "10it [00:02, 3.75it/s]\n" + ] + } + ], + "source": [ + "from tqdm import tqdm\n", + "mean_scores_ideal = {'PESQ': 0,'STOI': 0}\n", + "mean_scores_model = {'PESQ': 0, 'STOI': 0}\n", + "\n", + "for clean_path, noisy_path in tqdm(zip(clean_wavs[:10], noisy_wavs[:10])):\n", + " clean_wav = load_wav(clean_path)\n", + " noisy_wav = load_wav(noisy_path)\n", + " denoised_wav = model(noisy_wav)\n", + " \n", + " scores_ideal = metrics.calculate(noisy_wav, clean_wav)\n", + " scores_model = metrics.calculate(noisy_wav, denoised_wav)\n", + " \n", + " mean_scores_ideal['PESQ'] += scores_ideal['PESQ']\n", + " mean_scores_ideal['STOI'] += scores_ideal['STOI']\n", + " \n", + " mean_scores_model['PESQ'] += scores_model['PESQ']\n", + " mean_scores_model['STOI'] += scores_model['STOI']\n", + "\n", + "mean_scores_ideal['PESQ'] = mean_scores_ideal['PESQ'] / len(clean_wavs)\n", + "mean_scores_ideal['STOI'] = mean_scores_ideal['STOI'] / len(clean_wavs)\n", + "mean_scores_model['PESQ'] = mean_scores_model['PESQ'] / len(clean_wavs)\n", + "mean_scores_model['STOI'] = mean_scores_model['STOI'] / len(clean_wavs)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f0eac478-9a2d-4820-a0ef-37a6d28025e0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PESQ': tensor(0.0215), 'STOI': tensor(0.0110)}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_scores_ideal" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "42651dba-fa5b-461f-acc3-5c226cdb355b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'PESQ': tensor(0.0207), 'STOI': tensor(0.0116)}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mean_scores_model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b60c40dd-7244-4ef3-be6c-e16df51e2e17", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "17bf893d-6468-48d7-902b-c160426a6067", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def inference(i):\n", + " prediction = model(noisy_wavs[i])\n", + " display(Audio(noisy_wavs[i],rate=48000))\n", + " display(Audio(clean_wavs[i],rate=48000))\n", + " display(Audio(prediction,rate=48000))\n", + " \n", + "inference(5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66c821ca-8c64-43d4-b1f0-6c01801ae6b1", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ac750ea-709a-4a2f-a76f-0940861ab099", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fc4220f-6c18-4fa7-8c60-13a55a785a0c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85ad6beb-0258-40b8-9e0e-3f6aaec6fdae", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}