Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
|
|
3 |
import numpy as np
|
4 |
import tempfile
|
5 |
import os
|
@@ -102,9 +103,9 @@ def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
|
|
102 |
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
|
103 |
|
104 |
def apply_bitcrush(audio, bit_depth=8):
|
105 |
-
samples = np.array(audio.get_array_of_samples())
|
106 |
max_val = np.iinfo(np.int16).max
|
107 |
-
crushed = (samples
|
108 |
return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
|
109 |
|
110 |
def apply_auto_gain(audio, target_dB=-20):
|
@@ -116,6 +117,16 @@ def apply_vocal_distortion(audio, intensity=0.3):
|
|
116 |
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
|
117 |
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
# === Vocal Isolation Helpers ===
|
120 |
def load_track_local(path, sample_rate, channels=2):
|
121 |
sig, rate = torchaudio.load(path)
|
@@ -188,7 +199,15 @@ if not preset_choices:
|
|
188 |
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
|
189 |
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
|
190 |
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
|
191 |
-
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
192 |
}
|
193 |
|
194 |
preset_names = list(preset_choices.keys())
|
@@ -252,7 +271,9 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
252 |
"Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
|
253 |
"Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
|
254 |
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
|
255 |
-
"Vocal Distortion": lambda x: apply_vocal_distortion(x)
|
|
|
|
|
256 |
}
|
257 |
|
258 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
@@ -317,7 +338,7 @@ def transcribe_audio(audio_path):
|
|
317 |
text = " ".join([seg.text for seg in segments])
|
318 |
return text
|
319 |
|
320 |
-
# === TTS
|
321 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
|
322 |
|
323 |
def generate_tts(text):
|
@@ -355,7 +376,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
|
355 |
if not nonsilent_ranges:
|
356 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
357 |
|
358 |
-
trimmed = audio[nonsilent_ranges[0][0]:
|
359 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
360 |
trimmed.export(out_path, format="wav")
|
361 |
return out_path
|
@@ -369,7 +390,7 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
369 |
mixed.export(out_path, format="wav")
|
370 |
return out_path
|
371 |
|
372 |
-
# === Dummy Voice Cloning Tab β Works
|
373 |
def clone_voice(*args):
|
374 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
375 |
|
@@ -432,7 +453,9 @@ effect_options = [
|
|
432 |
"Flanger",
|
433 |
"Bitcrusher",
|
434 |
"Auto Gain",
|
435 |
-
"Vocal Distortion"
|
|
|
|
|
436 |
]
|
437 |
|
438 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
@@ -512,6 +535,47 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
512 |
description="Convert voice to text and edit it before exporting again."
|
513 |
)
|
514 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
515 |
# --- Voice Cloning (Local Only) ===
|
516 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
517 |
gr.Interface(
|
@@ -563,7 +627,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
563 |
return None, None, None, None
|
564 |
|
565 |
with gr.Tab("π§Ύ Auto-Save & Resume"):
|
566 |
-
gr.Markdown("Save your current state and resume
|
567 |
|
568 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
569 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
@@ -622,7 +686,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
622 |
)
|
623 |
|
624 |
# --- Mix Two Tracks ===
|
625 |
-
with gr.Tab("
|
626 |
gr.Interface(
|
627 |
fn=mix_tracks,
|
628 |
inputs=[
|
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
3 |
+
from pydub.silence import detect_nonsilent
|
4 |
import numpy as np
|
5 |
import tempfile
|
6 |
import os
|
|
|
103 |
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
|
104 |
|
105 |
def apply_bitcrush(audio, bit_depth=8):
|
106 |
+
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
107 |
max_val = np.iinfo(np.int16).max
|
108 |
+
crushed = ((samples / max_val) * (2 ** bit_depth)).astype(np.int16)
|
109 |
return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
|
110 |
|
111 |
def apply_auto_gain(audio, target_dB=-20):
|
|
|
117 |
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
|
118 |
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
|
119 |
|
120 |
+
def apply_harmony(audio, shift_semitones=4):
|
121 |
+
shifted_up = apply_pitch_shift(audio, shift_semitones)
|
122 |
+
shifted_down = apply_pitch_shift(audio, -shift_semitones)
|
123 |
+
return audio.overlay(shifted_up).overlay(shifted_down)
|
124 |
+
|
125 |
+
def apply_stage_mode(audio):
|
126 |
+
processed = apply_reverb(audio)
|
127 |
+
processed = apply_bass_boost(processed, gain=6)
|
128 |
+
return apply_limiter(processed, limit_dB=-2)
|
129 |
+
|
130 |
# === Vocal Isolation Helpers ===
|
131 |
def load_track_local(path, sample_rate, channels=2):
|
132 |
sig, rate = torchaudio.load(path)
|
|
|
199 |
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
|
200 |
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
|
201 |
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
|
202 |
+
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
|
203 |
+
|
204 |
+
# π€ Vocalist Presets
|
205 |
+
"π Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
|
206 |
+
"π§ͺ Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
|
207 |
+
"πΆ Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
|
208 |
+
"π« ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
|
209 |
+
"πΌ Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
|
210 |
+
"π΅ Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"]
|
211 |
}
|
212 |
|
213 |
preset_names = list(preset_choices.keys())
|
|
|
271 |
"Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
|
272 |
"Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
|
273 |
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
|
274 |
+
"Vocal Distortion": lambda x: apply_vocal_distortion(x),
|
275 |
+
"Harmony": lambda x: apply_harmony(x),
|
276 |
+
"Stage Mode": apply_stage_mode
|
277 |
}
|
278 |
|
279 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
|
|
338 |
text = " ".join([seg.text for seg in segments])
|
339 |
return text
|
340 |
|
341 |
+
# === TTS Voice Generator ===
|
342 |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
|
343 |
|
344 |
def generate_tts(text):
|
|
|
376 |
if not nonsilent_ranges:
|
377 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
378 |
|
379 |
+
trimmed = audio[nonsilent_ranges[0][0]:nonsilent_ranges[-1][1]]
|
380 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
381 |
trimmed.export(out_path, format="wav")
|
382 |
return out_path
|
|
|
390 |
mixed.export(out_path, format="wav")
|
391 |
return out_path
|
392 |
|
393 |
+
# === Dummy Voice Cloning Tab β Works Locally Only ===
|
394 |
def clone_voice(*args):
|
395 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
396 |
|
|
|
453 |
"Flanger",
|
454 |
"Bitcrusher",
|
455 |
"Auto Gain",
|
456 |
+
"Vocal Distortion",
|
457 |
+
"Harmony",
|
458 |
+
"Stage Mode"
|
459 |
]
|
460 |
|
461 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
|
535 |
description="Convert voice to text and edit it before exporting again."
|
536 |
)
|
537 |
|
538 |
+
# --- Vocal Presets for Singers ===
|
539 |
+
with gr.Tab("π€ Vocal Presets for Singers"):
|
540 |
+
gr.Interface(
|
541 |
+
fn=process_audio,
|
542 |
+
inputs=[
|
543 |
+
gr.Audio(label="Upload Vocal Track", type="filepath"),
|
544 |
+
gr.CheckboxGroup(choices=[
|
545 |
+
"Noise Reduction",
|
546 |
+
"Normalize",
|
547 |
+
"Compress Dynamic Range",
|
548 |
+
"Bass Boost",
|
549 |
+
"Treble Boost",
|
550 |
+
"Reverb",
|
551 |
+
"Auto Gain",
|
552 |
+
"Vocal Distortion",
|
553 |
+
"Harmony",
|
554 |
+
"Stage Mode"
|
555 |
+
]),
|
556 |
+
gr.Checkbox(label="Isolate Vocals After Effects"),
|
557 |
+
gr.Dropdown(choices=[
|
558 |
+
"π Clean Vocal",
|
559 |
+
"π§ͺ Vocal Distortion",
|
560 |
+
"πΆ Singer's Harmony",
|
561 |
+
"π« ASMR Vocal",
|
562 |
+
"πΌ Stage Mode",
|
563 |
+
"π΅ Auto-Tune Style"
|
564 |
+
], label="Select Vocal Preset", value="Default"),
|
565 |
+
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
566 |
+
],
|
567 |
+
outputs=[
|
568 |
+
gr.Audio(label="Processed Vocal", type="filepath"),
|
569 |
+
gr.Image(label="Waveform Preview"),
|
570 |
+
gr.Textbox(label="Session Log (JSON)", lines=5),
|
571 |
+
gr.Textbox(label="Detected Genre", lines=1),
|
572 |
+
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
573 |
+
],
|
574 |
+
title="Create Studio-Quality Vocal Tracks",
|
575 |
+
description="Apply singer-friendly presets and effects to enhance vocals.",
|
576 |
+
allow_flagging="never"
|
577 |
+
)
|
578 |
+
|
579 |
# --- Voice Cloning (Local Only) ===
|
580 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
581 |
gr.Interface(
|
|
|
627 |
return None, None, None, None
|
628 |
|
629 |
with gr.Tab("π§Ύ Auto-Save & Resume"):
|
630 |
+
gr.Markdown("Save your current state and resume later.")
|
631 |
|
632 |
action_radio = gr.Radio(["save", "load"], label="Action", value="save")
|
633 |
audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
|
|
|
686 |
)
|
687 |
|
688 |
# --- Mix Two Tracks ===
|
689 |
+
with gr.Tab(" remix mode"),
|
690 |
gr.Interface(
|
691 |
fn=mix_tracks,
|
692 |
inputs=[
|