{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Install env deps" ] }, { "cell_type": "code", "execution_count": null, "id": "268d87b8-54a2-4e09-bbef-ed28f93719f3", "metadata": {}, "outputs": [], "source": [ "!pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip3 install numba numpy scikit-learn tqdm pynini datasets deep-phonemizer nemo-text-processing piq soundfile transformers unidecode tensorboard librosa gpustat chardet" ] }, { "cell_type": "code", "execution_count": null, "id": "d0662705-c8a6-4588-9435-07aecb004825", "metadata": {}, "outputs": [], "source": [ "!pip install lightning" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add tensorboard to track the basic metrics and outputs" ] }, { "cell_type": "code", "execution_count": null, "id": "089094c9-4b03-47a8-a500-50f15d9a8ff7", "metadata": {}, "outputs": [], "source": [ "%load_ext tensorboard" ] }, { "cell_type": "code", "execution_count": null, "id": "34c26745-47fd-4ad8-9bfc-d3952a406b2a", "metadata": {}, "outputs": [], "source": [ "default_root_dir=\"logs/acoustic\"" ] }, { "cell_type": "code", "execution_count": null, "id": "de8ed1db-2d23-4f34-abad-43d287a2ffd9", "metadata": {}, "outputs": [], "source": [ "%tensorboard --logdir {default_root_dir}" ] }, { "cell_type": "code", "execution_count": null, "id": "9d26af70-b7ea-4e64-888c-106a984a3fdd", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "from lightning.pytorch import Trainer\n", "from lightning.pytorch.callbacks import StochasticWeightAveraging\n", "from lightning.pytorch.loggers import TensorBoardLogger\n", "from lightning.pytorch.tuner.tuning import Tuner\n", "\n", "from training.modules import AcousticModule, VocoderModule, AcousticDataModule\n", "\n", "os.environ[\"CUDA_LAUNCH_BLOCKING\"]=\"1\"\n", "CUDA_LAUNCH_BLOCKING=1" ] }, { "cell_type": "code", "execution_count": null, "id": "9fdbeb42-0158-4c77-874b-6fd3f72e1219", "metadata": {}, "outputs": [], "source": [ "accelerator=\"cuda\"\n", "ckpt_acoustic=\"./checkpoints/am_pitche_stats_with_vocoder.ckpt\"\n", "ckpt_vocoder=\"./checkpoints/vocoder.ckpt\"\n", "\n", "# Control Validation Frequency\n", "check_val_every_n_epoch=10\n", "# Accumulate gradients\n", "accumulate_grad_batches=5\n", "# SWA learning rate\n", "swa_lrs=1e-2\n", "\n", "# Stochastic Weight Averaging (SWA) can make your models generalize\n", "# better at virtually no additional cost.\n", "# This can be used with both non-trained and trained models.\n", "# The SWA procedure smooths the loss landscape thus making it\n", "# harder to end up in a local minimum during optimization.\n", "callbacks = [\n", " StochasticWeightAveraging(swa_lrs=swa_lrs),\n", " # TODO: Add EarlyStopping Callback\n", "]\n", "\n", "tensorboard = TensorBoardLogger(save_dir=default_root_dir)\n", "\n", "trainer = Trainer(\n", " logger=tensorboard,\n", " # Save checkpoints to the `default_root_dir` directory\n", " default_root_dir=default_root_dir,\n", " accelerator=accelerator,\n", " check_val_every_n_epoch=check_val_every_n_epoch,\n", " accumulate_grad_batches=accumulate_grad_batches,\n", " max_epochs=-1,\n", " callbacks=callbacks,\n", " devices=2,\n", ")\n", "\n", "# Load the pretrained weights for the vocoder\n", "vocoder_module = VocoderModule.load_from_checkpoint(\n", " ckpt_vocoder,\n", ")\n", "\n", "module = AcousticModule.load_from_checkpoint(\n", " ckpt_acoustic,\n", " vocoder_module=vocoder_module,\n", ")\n", "\n", "datamodule = AcousticDataModule(batch_size=module.train_config.batch_size)\n", "\n", "# Create a Tuner\n", "tuner = Tuner(trainer)\n", "\n", "# finds learning rate automatically\n", "# sets hparams.lr or hparams.learning_rate to that learning rate\n", "tuner.lr_find(module)\n", "\n", "tuner.scale_batch_size(module, datamodule=datamodule)\n", "\n", "# vocoder_module = VocoderModule()\n", "# module = AcousticModule()\n", "\n", "# train_dataloader = module.train_dataloader()\n", "\n", "trainer.fit(model=module) #, train_dataloaders=train_dataloader)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "16370731-72bb-497a-a1cc-00b0a5d1496f", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "environment": { "kernel": "conda-env-tts_framework-py", "name": "workbench-notebooks.m111", "type": "gcloud", "uri": "gcr.io/deeplearning-platform-release/workbench-notebooks:m111" }, "kernelspec": { "display_name": "tts_framework", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }