mkutarna commited on
Commit
82b2b89
·
1 Parent(s): fd88500

Switch audio output to mp3 rather than wav

Browse files
notebooks/audiobook_gen_silero.ipynb CHANGED
@@ -229,7 +229,7 @@
229
  "metadata": {},
230
  "outputs": [],
231
  "source": [
232
- "ebook[0][0]"
233
  ]
234
  },
235
  {
@@ -249,7 +249,7 @@
249
  "metadata": {},
250
  "outputs": [],
251
  "source": [
252
- "#os.mkdir(f'outputs/{title}')\n",
253
  "\n",
254
  "for chapter in tqdm(ebook[0:3]):\n",
255
  " chapter_index = f'chapter{ebook.index(chapter):03}'\n",
@@ -263,11 +263,11 @@
263
  " else:\n",
264
  " print(f'Tensor for sentence is not valid: \\n {sentence}')\n",
265
  "\n",
266
- " sample_path = f'outputs/{title}/{chapter_index}.wav'\n",
267
  "\n",
268
  " if len(audio_list) > 0:\n",
269
  " audio_file = torch.cat(audio_list).reshape(1, -1)\n",
270
- "# torchaudio.save(sample_path, audio_file, sample_rate)\n",
271
  " else:\n",
272
  " print(f'Chapter {chapter_index} is empty.')"
273
  ]
@@ -313,7 +313,7 @@
313
  ],
314
  "metadata": {
315
  "kernelspec": {
316
- "display_name": "Python 3",
317
  "language": "python",
318
  "name": "python3"
319
  },
 
229
  "metadata": {},
230
  "outputs": [],
231
  "source": [
232
+ "ebook[0:3]"
233
  ]
234
  },
235
  {
 
249
  "metadata": {},
250
  "outputs": [],
251
  "source": [
252
+ "os.mkdir(f'outputs/{title}')\n",
253
  "\n",
254
  "for chapter in tqdm(ebook[0:3]):\n",
255
  " chapter_index = f'chapter{ebook.index(chapter):03}'\n",
 
263
  " else:\n",
264
  " print(f'Tensor for sentence is not valid: \\n {sentence}')\n",
265
  "\n",
266
+ " sample_path = f'outputs/{title}/{chapter_index}.mp3'\n",
267
  "\n",
268
  " if len(audio_list) > 0:\n",
269
  " audio_file = torch.cat(audio_list).reshape(1, -1)\n",
270
+ " torchaudio.save(sample_path, audio_file, sample_rate, format=\"mp3\")\n",
271
  " else:\n",
272
  " print(f'Chapter {chapter_index} is empty.')"
273
  ]
 
313
  ],
314
  "metadata": {
315
  "kernelspec": {
316
+ "display_name": "Python 3 (ipykernel)",
317
  "language": "python",
318
  "name": "python3"
319
  },
src/output.py CHANGED
@@ -35,7 +35,7 @@ def write_audio(audio_list, sample_path):
35
 
36
  if len(audio_list) > 0:
37
  audio_file = torch.cat(audio_list).reshape(1, -1)
38
- torchaudio.save(sample_path, audio_file, cf.SAMPLE_RATE)
39
  logging.info(f'Audio generated at: {sample_path}')
40
  else:
41
  logging.info(f'Audio at: {sample_path} is empty.')
@@ -67,7 +67,7 @@ def assemble_zip(title):
67
 
68
  with zipfile.ZipFile(zip_name, mode="w") as archive:
69
  for file_path in stqdm(config.output_path.iterdir()):
70
- if file_path.suffix == '.wav':
71
  archive.write(file_path, arcname=file_path.name)
72
  file_path.unlink()
73
 
 
35
 
36
  if len(audio_list) > 0:
37
  audio_file = torch.cat(audio_list).reshape(1, -1)
38
+ torchaudio.save(sample_path, audio_file, cf.SAMPLE_RATE, format="mp3")
39
  logging.info(f'Audio generated at: {sample_path}')
40
  else:
41
  logging.info(f'Audio at: {sample_path} is empty.')
 
67
 
68
  with zipfile.ZipFile(zip_name, mode="w") as archive:
69
  for file_path in stqdm(config.output_path.iterdir()):
70
+ if file_path.suffix == '.mp3':
71
  archive.write(file_path, arcname=file_path.name)
72
  file_path.unlink()
73
 
src/predict.py CHANGED
@@ -106,5 +106,5 @@ def predict(text_section, section_index, title, model, speaker):
106
  else:
107
  logging.info(f'Tensor for sentence is not valid: \n {sentence}')
108
 
109
- sample_path = config.output_path / f'{title}_{section_index}.wav'
110
  return audio_list, sample_path
 
106
  else:
107
  logging.info(f'Tensor for sentence is not valid: \n {sentence}')
108
 
109
+ sample_path = config.output_path / f'{title}_{section_index}.mp3'
110
  return audio_list, sample_path