Text-to-Speech
F5-TTS
Italian
alien79 commited on
Commit
e4389e7
·
1 Parent(s): 0ce3699

add run.py

Browse files
Files changed (1) hide show
  1. run.py +60 -0
run.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import soundfile as sf
3
+ import csv
4
+ from datasets import load_dataset
5
+
6
+ # Load the Italian subset of the Multilingual LibriSpeech dataset
7
+ dataset = load_dataset("facebook/multilingual_librispeech", "italian")
8
+
9
+ # Define the output directory
10
+ output_dir = "multilingual_librispeech_italian"
11
+ os.makedirs(output_dir, exist_ok=True)
12
+
13
+ def save_split(split_name, dry_run=False):
14
+ split = dataset[split_name]
15
+ split_dir = os.path.join(output_dir, split_name)
16
+ os.makedirs(split_dir, exist_ok=True)
17
+
18
+ wavs_dir = os.path.join(split_dir, "wavs")
19
+ os.makedirs(wavs_dir, exist_ok=True)
20
+
21
+ COLUMNS_TO_KEEP = ["transcript", "audio", "sampling_rate"]
22
+ all_columns = split.column_names
23
+
24
+ if dry_run:
25
+ print(split)
26
+ columns_to_remove = set(all_columns) - set(COLUMNS_TO_KEEP)
27
+ split = split.remove_columns(columns_to_remove)
28
+ print(split[0])
29
+ return
30
+
31
+ columns_to_remove = set(all_columns) - set(COLUMNS_TO_KEEP)
32
+ split = split.remove_columns(columns_to_remove)
33
+
34
+ metadata_path = os.path.join(split_dir, "metadata.csv")
35
+
36
+ with open(metadata_path, mode='w', newline='', encoding='utf-8') as file:
37
+ writer = csv.writer(file, delimiter='|')
38
+
39
+ for i, example in enumerate(split):
40
+ # Extract audio data and sampling rate
41
+ audio = example["audio"]
42
+ audio_array = audio["array"]
43
+ sampling_rate = audio["sampling_rate"]
44
+
45
+ # Define file paths
46
+ audio_path = os.path.join(wavs_dir, f"{i}.wav")
47
+
48
+ # Save audio file in WAV format
49
+ sf.write(audio_path, audio_array, sampling_rate)
50
+
51
+ # Save transcription
52
+ # transcription_path = os.path.join(split_dir, f"{i}.txt")
53
+ # with open(transcription_path, "w", encoding="utf-8") as f:
54
+ # f.write(example["transcript"])
55
+
56
+ # Save metadata
57
+ writer.writerow([audio_path, example["transcript"]])
58
+
59
+ # save_split("1_hours", dry_run=True)
60
+ save_split("9_hours")