Spaces:

mkutarna
/

audiobook_gen

Build error

mkutarna commited on Dec 9, 2022

Commit

82b2b89

1 Parent(s): fd88500

Switch audio output to mp3 rather than wav

Files changed (3) hide show

notebooks/audiobook_gen_silero.ipynb CHANGED Viewed

@@ -229,7 +229,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ebook[0][0]"
    ]
   },
   {
@@ -249,7 +249,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#os.mkdir(f'outputs/{title}')\n",
     "\n",
     "for chapter in tqdm(ebook[0:3]):\n",
     "    chapter_index = f'chapter{ebook.index(chapter):03}'\n",
@@ -263,11 +263,11 @@
     "        else:\n",
     "            print(f'Tensor for sentence is not valid: \\n {sentence}')\n",
     "\n",
-    "    sample_path = f'outputs/{title}/{chapter_index}.wav'\n",
     "\n",
     "    if len(audio_list) > 0:\n",
     "        audio_file = torch.cat(audio_list).reshape(1, -1)\n",
-    "#         torchaudio.save(sample_path, audio_file, sample_rate)\n",
     "    else:\n",
     "        print(f'Chapter {chapter_index} is empty.')"
    ]
@@ -313,7 +313,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },

    "metadata": {},
    "outputs": [],
    "source": [
+    "ebook[0:3]"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "os.mkdir(f'outputs/{title}')\n",
     "\n",
     "for chapter in tqdm(ebook[0:3]):\n",
     "    chapter_index = f'chapter{ebook.index(chapter):03}'\n",
     "        else:\n",
     "            print(f'Tensor for sentence is not valid: \\n {sentence}')\n",
     "\n",
+    "    sample_path = f'outputs/{title}/{chapter_index}.mp3'\n",
     "\n",
     "    if len(audio_list) > 0:\n",
     "        audio_file = torch.cat(audio_list).reshape(1, -1)\n",
+    "        torchaudio.save(sample_path, audio_file, sample_rate, format=\"mp3\")\n",
     "    else:\n",
     "        print(f'Chapter {chapter_index} is empty.')"
    ]
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },

src/output.py CHANGED Viewed

@@ -35,7 +35,7 @@ def write_audio(audio_list, sample_path):
     if len(audio_list) > 0:
         audio_file = torch.cat(audio_list).reshape(1, -1)
-        torchaudio.save(sample_path, audio_file, cf.SAMPLE_RATE)
         logging.info(f'Audio generated at: {sample_path}')
     else:
         logging.info(f'Audio at: {sample_path} is empty.')
@@ -67,7 +67,7 @@ def assemble_zip(title):
     with zipfile.ZipFile(zip_name, mode="w") as archive:
         for file_path in stqdm(config.output_path.iterdir()):
-            if file_path.suffix == '.wav':
                 archive.write(file_path, arcname=file_path.name)
                 file_path.unlink()

     if len(audio_list) > 0:
         audio_file = torch.cat(audio_list).reshape(1, -1)
+        torchaudio.save(sample_path, audio_file, cf.SAMPLE_RATE, format="mp3")
         logging.info(f'Audio generated at: {sample_path}')
     else:
         logging.info(f'Audio at: {sample_path} is empty.')
     with zipfile.ZipFile(zip_name, mode="w") as archive:
         for file_path in stqdm(config.output_path.iterdir()):
+            if file_path.suffix == '.mp3':
                 archive.write(file_path, arcname=file_path.name)
                 file_path.unlink()

src/predict.py CHANGED Viewed

@@ -106,5 +106,5 @@ def predict(text_section, section_index, title, model, speaker):
         else:
             logging.info(f'Tensor for sentence is not valid: \n {sentence}')
-    sample_path = config.output_path / f'{title}_{section_index}.wav'
     return audio_list, sample_path

         else:
             logging.info(f'Tensor for sentence is not valid: \n {sentence}')
+    sample_path = config.output_path / f'{title}_{section_index}.mp3'
     return audio_list, sample_path