Spaces:
Sleeping
Sleeping
import gradio as gr, glob, os, auditok, zipfile, wave, pytube.exceptions, librosa | |
from pytube import YouTube | |
from moviepy.editor import VideoFileClip | |
def download_video(url): | |
try: | |
yt = YouTube(url) | |
except pytube.exceptions.RegexMatchError: | |
raise gr.Error("URL not valid or is empty! Please fix the link or enter one!") | |
video = yt.streams.get_highest_resolution() | |
video.download() | |
video_path = f"{video.default_filename}" | |
video_clip = VideoFileClip(video_path) | |
audio_clip = video_clip.audio | |
audio_clip.write_audiofile("output.wav") | |
audio_clip.close() | |
video_clip.close() | |
for removalmp4 in glob.glob("*.mp4"): | |
os.remove(removalmp4) | |
return "Finished downloading! Please proceed to next tab." | |
def split_audio_from_yt_video(mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur): | |
if show_amount_of_files_and_file_dur == True: | |
gr.Warning(f"show_amount_of_files_and_file_dur set to True. This will take longer if your audio file is long.") | |
if not os.path.exists("output.wav"): | |
raise gr.Error("Output.wav does not exist! Did you do the first tab correctly or at all?") | |
if mindur == maxdur: | |
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") | |
elif mindur > maxdur: | |
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") | |
elif name_for_split_files == None: | |
raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.") | |
else: | |
audio_path = "output.wav" | |
audio_regions = auditok.split( | |
audio_path, | |
min_dur=mindur, | |
max_dur=maxdur, | |
max_silence=0.3, | |
energy_threshold=45 | |
) | |
os.remove(audio_path) | |
for i, r in enumerate(audio_regions): | |
filename = r.save(f"{name_for_split_files}-{i+1}.wav") | |
for f in sorted(glob.glob("*.wav")): | |
audio_files = glob.glob("*.wav") | |
zip_file_name = "audio_files.zip" | |
with zipfile.ZipFile(zip_file_name, "w") as zip_file: | |
for audio_file in audio_files: | |
zip_file.write(audio_file, os.path.basename(audio_file)) | |
if show_amount_of_files_and_file_dur == False: | |
return "Files split successfully!\nCheck below for zipped files.", zip_file_name | |
elif show_amount_of_files_and_file_dur == True: | |
largest_file = ("", 0) | |
total_files = 0 | |
total_length = 0.0 | |
for file_name in glob.glob("*.wav"): | |
file_path = os.path.join(os.getcwd(), file_name) | |
if file_path.lower().endswith(".wav"): | |
try: | |
with wave.open(file_path, 'r') as audio_file: | |
frames = audio_file.getnframes() | |
rate = audio_file.getframerate() | |
duration = frames / float(rate) | |
file_size = os.path.getsize(file_path) | |
if file_size > largest_file[1]: | |
largest_file = (file_name, file_size) | |
total_length += duration | |
total_files += 1 | |
except wave.Error as e: | |
raise gr.Error(f"Error reading file: {e}") | |
length_mins = total_length / 60 | |
for file2 in glob.glob("*.wav"): | |
os.remove(file2) | |
return f"Files split successfully!\nCheck below for zipped files.\n\n{total_files} files created, {length_mins:.2f} minutes total.", zip_file_name | |
def analyze_audio(zip_file_path): | |
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
zip_ref.extractall('unzipped_files') | |
total_sample_rate = 0 | |
total_files = 0 | |
for file_name in os.listdir('unzipped_files'): | |
if file_name.lower().endswith('.wav'): | |
file_path = os.path.join('unzipped_files', file_name) | |
try: | |
with wave.open(file_path, 'r') as audio_file: | |
sample_rate = audio_file.getframerate() | |
total_sample_rate += sample_rate | |
total_files += 1 | |
except wave.Error as e: | |
print(f"Error reading file: {e}") | |
if total_files > 0: | |
average_sample_rate = total_sample_rate / total_files | |
return f"Average sample rate: {average_sample_rate}" | |
else: | |
return "No average sample rate could be found." | |
def split_wav_file(audiofileuploader, mindur2, maxdur2, name_for_split_files2): | |
if audiofileuploader == None: | |
raise gr.Error("Audio file cannot be empty!") | |
if mindur2 == maxdur2: | |
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, min and max are the same number.") | |
elif mindur2 > maxdur2: | |
raise gr.Error(f"Cannot split mindur={mindur} and maxdur={maxdur}, mindur is higher than maxdur.") | |
elif name_for_split_files2 == None: | |
raise gr.Error("Split files name cannot be empty! This will be replaced with an alternative naming style in the future.") | |
else: | |
audio_path = audiofileuploader | |
audio_regions = auditok.split( | |
audio_path, | |
min_dur=mindur2, | |
max_dur=maxdur2, | |
max_silence=0.3, | |
energy_threshold=45 | |
) | |
os.remove(audio_path) | |
for i, r in enumerate(audio_regions): | |
filename = r.save(f"{name_for_split_files2}-{i+1}.wav") | |
for f in sorted(glob.glob("*.wav")): | |
audio_files = glob.glob("*.wav") | |
zip_file_name2 = "audio_files.zip" | |
with zipfile.ZipFile(zip_file_name2, "w") as zip_file: | |
for audio_file in audio_files: | |
zip_file.write(audio_file, os.path.basename(audio_file)) | |
for file2 in glob.glob("*.wav"): | |
os.remove(file2) | |
return f"File split successfully!\nCheck below for zipped files.\nAmount created: {len(audio_files)}", zip_file_name2 | |
def get_average_pitch(audio_file): | |
y, sr = librosa.load(audio_file, sr=None) | |
pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
mean_pitch = pitches.mean() | |
return f"Average pitch: {mean_pitch:.2f} Hz" | |
with gr.Blocks(theme='NoCrypt/miku', title="Global Dataset Maker") as app: | |
gr.HTML( | |
"<h1> Welcome to the GDMGS! (GlobalDatasetMaker Gradio Space) </h1>" | |
) | |
gr.Markdown( | |
"This Space will create a dataset for you, all automatically. **Please be warned that due to not having a GPU on this Space, some steps might take longer to complete.**" | |
) | |
with gr.Tabs(): | |
with gr.TabItem("Download Video"): | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
url = gr.Textbox(label="URL") | |
convertion = gr.Button("Download", variant='primary') | |
convertion.click( | |
fn=download_video, | |
inputs=[url], | |
outputs=gr.Text(label="Output") | |
) | |
with gr.TabItem("Split audio files"): | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
mindur = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) | |
maxdur = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) | |
name_for_split_files = gr.Textbox(label="Name for split files") | |
show_amount_of_files_and_file_dur = gr.Checkbox(False, label="Show total amount of files and duration?") | |
splitbtn = gr.Button("Split", variant='primary') | |
splitbtn.click( | |
split_audio_from_yt_video, | |
inputs=[mindur, maxdur, name_for_split_files, show_amount_of_files_and_file_dur], | |
outputs=[gr.Text(label="Output"), gr.File(label="Zipped files")] | |
) | |
with gr.TabItem("Misc tools"): | |
with gr.Tab("SR analyzer"): | |
gr.Markdown("Upload a zip file of your wavs here and this will determine the average sample rate.") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
zipuploader = gr.File(file_count='single', file_types=[".zip"], label="ZIP file") | |
uploadbtn = gr.Button("Analyze", variant='primary') | |
uploadbtn.click( | |
analyze_audio, | |
[zipuploader], | |
[gr.Text(label="Result")] | |
) | |
with gr.Tab("File splitter"): | |
gr.Markdown("If you would rather split a single WAV (mp3 support soon) audio file, use this method instead.") | |
gr.HTML( | |
"<h1> Most bugs should be fixed now, if not, let me know. </h1>" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
audiofileuploader = gr.File(file_count='single', file_types=[".wav"], label="WAV file") | |
mindur2 = gr.Number(label="Min duration", minimum=1, maximum=10, value=1) | |
maxdur2 = gr.Number(label="Max duration", minimum=1, maximum=10, value=5) | |
name_for_split_files2 = gr.Textbox(label="Name for split files") | |
audiofileuploadbtn = gr.Button("Split", variant='primary') | |
audiofileuploadbtn.click( | |
split_wav_file, | |
[audiofileuploader, mindur2, maxdur2, name_for_split_files2], | |
[gr.Text(label="Output"), gr.File(label="Zipped files")] | |
) | |
with gr.Tab("Pitch analyzer"): | |
gr.Markdown("Upload a wav file here, and this will determine the average pitch.") | |
gr.HTML("<h1> Zip files are not supported as of now.") | |
with gr.Row(): | |
with gr.Column(variant='compact'): | |
with gr.Row(): | |
upload = gr.File(file_count='single', file_types=[".wav"], label="WAV file") | |
analyze = gr.Button("Analyze", variant='primary') | |
analyze.click( | |
get_average_pitch, | |
[upload], | |
[gr.Text(label="Result")] | |
) | |
app.launch() |