Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -85,7 +85,7 @@ def apply_bass_boost(audio, gain=10):
|
|
85 |
def apply_treble_boost(audio, gain=10):
|
86 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
87 |
|
88 |
-
def apply_noise_gate(audio, threshold=-50.0
|
89 |
samples = np.array(audio.get_array_of_samples())
|
90 |
rms = np.sqrt(np.mean(samples**2))
|
91 |
if rms < 1:
|
@@ -99,8 +99,8 @@ def apply_limiter(audio, limit_dB=-1):
|
|
99 |
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
|
100 |
return limiter.apply_gain(limit_dB)
|
101 |
|
102 |
-
def apply_phaser(audio
|
103 |
-
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate *
|
104 |
|
105 |
def apply_bitcrush(audio, bit_depth=8):
|
106 |
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
@@ -127,6 +127,23 @@ def apply_stage_mode(audio):
|
|
127 |
processed = apply_bass_boost(processed, gain=6)
|
128 |
return apply_limiter(processed, limit_dB=-2)
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
# === Vocal Isolation Helpers ===
|
131 |
def load_track_local(path, sample_rate, channels=2):
|
132 |
sig, rate = torchaudio.load(path)
|
@@ -390,10 +407,49 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
390 |
mixed.export(out_path, format="wav")
|
391 |
return out_path
|
392 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
# === Dummy Voice Cloning Tab β Works Locally Only ===
|
394 |
def clone_voice(*args):
|
395 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
397 |
# === Speaker Diarization ("Who Spoke When?") ===
|
398 |
try:
|
399 |
from pyannote.audio import Pipeline as DiarizationPipeline
|
@@ -417,7 +473,6 @@ def diarize_and_transcribe(audio_path):
|
|
417 |
audio.export(temp_wav, format="wav")
|
418 |
|
419 |
try:
|
420 |
-
from pyannote.audio import Pipeline as DiarizationPipeline
|
421 |
diarization = diarize_pipeline(temp_wav)
|
422 |
|
423 |
result = whisper.transcribe(temp_wav)
|
@@ -525,6 +580,20 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
525 |
clear_btn=None
|
526 |
)
|
527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
528 |
# --- Transcribe & Edit Tab ===
|
529 |
with gr.Tab("π Transcribe & Edit"):
|
530 |
gr.Interface(
|
@@ -535,40 +604,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
535 |
description="Convert voice to text and edit it before exporting again."
|
536 |
)
|
537 |
|
538 |
-
# --- Vocal Presets for Singers ===
|
539 |
-
with gr.Tab("π€ Vocal Presets for Singers"):
|
540 |
-
gr.Interface(
|
541 |
-
fn=process_audio,
|
542 |
-
inputs=[
|
543 |
-
gr.Audio(label="Upload Vocal Track", type="filepath"),
|
544 |
-
gr.CheckboxGroup(choices=[
|
545 |
-
"Noise Reduction",
|
546 |
-
"Normalize",
|
547 |
-
"Compress Dynamic Range",
|
548 |
-
"Bass Boost",
|
549 |
-
"Treble Boost",
|
550 |
-
"Reverb",
|
551 |
-
"Auto Gain",
|
552 |
-
"Vocal Distortion",
|
553 |
-
"Harmony",
|
554 |
-
"Stage Mode"
|
555 |
-
]),
|
556 |
-
gr.Checkbox(label="Isolate Vocals After Effects"),
|
557 |
-
gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0] if preset_names else None),
|
558 |
-
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
559 |
-
],
|
560 |
-
outputs=[
|
561 |
-
gr.Audio(label="Processed Vocal", type="filepath"),
|
562 |
-
gr.Image(label="Waveform Preview"),
|
563 |
-
gr.Textbox(label="Session Log (JSON)", lines=5),
|
564 |
-
gr.Textbox(label="Detected Genre", lines=1),
|
565 |
-
gr.Textbox(label="Status", value="β
Ready", lines=1)
|
566 |
-
],
|
567 |
-
title="Create Studio-Quality Vocal Tracks",
|
568 |
-
description="Apply singer-friendly presets and effects to enhance vocals.",
|
569 |
-
allow_flagging="never"
|
570 |
-
)
|
571 |
-
|
572 |
# --- Voice Cloning (Local Only) ===
|
573 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
574 |
gr.Interface(
|
@@ -679,7 +714,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
679 |
)
|
680 |
|
681 |
# --- Mix Two Tracks ===
|
682 |
-
with gr.Tab("
|
683 |
gr.Interface(
|
684 |
fn=mix_tracks,
|
685 |
inputs=[
|
|
|
85 |
def apply_treble_boost(audio, gain=10):
|
86 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
87 |
|
88 |
+
def apply_noise_gate(audio, threshold=-50.0):
|
89 |
samples = np.array(audio.get_array_of_samples())
|
90 |
rms = np.sqrt(np.mean(samples**2))
|
91 |
if rms < 1:
|
|
|
99 |
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
|
100 |
return limiter.apply_gain(limit_dB)
|
101 |
|
102 |
+
def apply_phaser(audio):
|
103 |
+
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * 1.1)})
|
104 |
|
105 |
def apply_bitcrush(audio, bit_depth=8):
|
106 |
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
|
|
127 |
processed = apply_bass_boost(processed, gain=6)
|
128 |
return apply_limiter(processed, limit_dB=-2)
|
129 |
|
130 |
+
# === Genre Mastering Presets ===
|
131 |
+
genre_presets = {
|
132 |
+
"Soul": ["Warmth", "Bass Boost (+6dB)", "Mid Enhance"],
|
133 |
+
"Funk": ["Treble Boost (+6dB)", "Compression", "Stereo Widening"],
|
134 |
+
"Rock": ["Distortion", "Punchy Mids", "Reverb"],
|
135 |
+
"Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
|
136 |
+
"Acoustic": ["Natural Reverb", "Gentle Compression", "Mid Focus"],
|
137 |
+
"Dance": ["Loudness Maximizer", "Bass Emphasis", "Stereo Widen"],
|
138 |
+
"EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
|
139 |
+
"Country": ["Clean Mix", "Subtle Reverb", "Mid Focus"],
|
140 |
+
"Disco": ["Rhythmic Echo", "Bass Thump", "Treble Boost (+8dB)"],
|
141 |
+
"Metal": ["Distortion", "High Gain", "Crisp Highs"],
|
142 |
+
"Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"],
|
143 |
+
"Trap": ["808 Bass", "Reverb", "Lo-Fi Texture"],
|
144 |
+
"LoFi": ["Bitcrusher", "Tape Hiss", "Soft Compression"]
|
145 |
+
}
|
146 |
+
|
147 |
# === Vocal Isolation Helpers ===
|
148 |
def load_track_local(path, sample_rate, channels=2):
|
149 |
sig, rate = torchaudio.load(path)
|
|
|
407 |
mixed.export(out_path, format="wav")
|
408 |
return out_path
|
409 |
|
410 |
+
# === Genre Mastering Tab ===
|
411 |
+
def apply_genre_preset(audio, genre):
|
412 |
+
global preset_choices
|
413 |
+
selected_preset = preset_choices.get(genre, [])
|
414 |
+
return process_audio(audio, selected_preset, False, genre, "WAV")
|
415 |
+
|
416 |
+
with gr.Tab("π§ Genre Mastering"):
|
417 |
+
gr.Markdown("Apply pre-tuned mastering settings for different music genres.")
|
418 |
+
|
419 |
+
genre_dropdown = gr.Dropdown(
|
420 |
+
choices=list(genre_presets.keys()),
|
421 |
+
label="Select Genre",
|
422 |
+
value="Pop"
|
423 |
+
)
|
424 |
+
|
425 |
+
gr.Interface(
|
426 |
+
fn=lambda audio, genre: apply_genre_preset(audio, genre)[0],
|
427 |
+
inputs=[
|
428 |
+
gr.Audio(label="Upload Track", type="filepath"),
|
429 |
+
genre_dropdown
|
430 |
+
],
|
431 |
+
outputs=gr.Audio(label="Mastered Output", type="filepath"),
|
432 |
+
title="Genre-Specific Mastering",
|
433 |
+
description="Apply professionally tuned presets for popular music genres."
|
434 |
+
)
|
435 |
+
|
436 |
# === Dummy Voice Cloning Tab β Works Locally Only ===
|
437 |
def clone_voice(*args):
|
438 |
return "β οΈ Voice cloning requires local install β use Python 3.9 or below"
|
439 |
|
440 |
+
with gr.Tab("π Voice Cloning (Local Only)"):
|
441 |
+
gr.Interface(
|
442 |
+
fn=clone_voice,
|
443 |
+
inputs=[
|
444 |
+
gr.File(label="Source Voice Clip"),
|
445 |
+
gr.File(label="Target Voice Clip"),
|
446 |
+
gr.Textbox(label="Text to Clone", lines=5)
|
447 |
+
],
|
448 |
+
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
449 |
+
title="Replace One Voice With Another",
|
450 |
+
description="Clone voice from source to target speaker using AI"
|
451 |
+
)
|
452 |
+
|
453 |
# === Speaker Diarization ("Who Spoke When?") ===
|
454 |
try:
|
455 |
from pyannote.audio import Pipeline as DiarizationPipeline
|
|
|
473 |
audio.export(temp_wav, format="wav")
|
474 |
|
475 |
try:
|
|
|
476 |
diarization = diarize_pipeline(temp_wav)
|
477 |
|
478 |
result = whisper.transcribe(temp_wav)
|
|
|
580 |
clear_btn=None
|
581 |
)
|
582 |
|
583 |
+
# --- Genre Mastering Tab ===
|
584 |
+
with gr.Tab("π§ Genre Mastering"):
|
585 |
+
gr.Interface(
|
586 |
+
fn=lambda audio, genre: apply_genre_preset(audio, genre)[0],
|
587 |
+
inputs=[
|
588 |
+
gr.Audio(label="Upload Track", type="filepath"),
|
589 |
+
gr.Dropdown(choices=list(genre_presets.keys()), label="Select Genre", value="Pop")
|
590 |
+
],
|
591 |
+
outputs=gr.Audio(label="Mastered Output", type="filepath"),
|
592 |
+
title="Genre-Specific Mastering",
|
593 |
+
description="Apply professionally tuned presets for popular music genres.",
|
594 |
+
allow_flagging="never"
|
595 |
+
)
|
596 |
+
|
597 |
# --- Transcribe & Edit Tab ===
|
598 |
with gr.Tab("π Transcribe & Edit"):
|
599 |
gr.Interface(
|
|
|
604 |
description="Convert voice to text and edit it before exporting again."
|
605 |
)
|
606 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
# --- Voice Cloning (Local Only) ===
|
608 |
with gr.Tab("π Voice Cloning (Local Only)"):
|
609 |
gr.Interface(
|
|
|
714 |
)
|
715 |
|
716 |
# --- Mix Two Tracks ===
|
717 |
+
with gr.Tab(" remix mode"),
|
718 |
gr.Interface(
|
719 |
fn=mix_tracks,
|
720 |
inputs=[
|