Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
3 |
-
from pydub.silence import detect_nonsilent
|
4 |
import numpy as np
|
5 |
import tempfile
|
6 |
import os
|
@@ -85,6 +84,38 @@ def apply_bass_boost(audio, gain=10):
|
|
85 |
def apply_treble_boost(audio, gain=10):
|
86 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
# === Vocal Isolation Helpers ===
|
89 |
def load_track_local(path, sample_rate, channels=2):
|
90 |
sig, rate = torchaudio.load(path)
|
@@ -152,7 +183,12 @@ if not preset_choices:
|
|
152 |
preset_choices = {
|
153 |
"Default": [],
|
154 |
"Clean Podcast": ["Noise Reduction", "Normalize"],
|
155 |
-
"
|
|
|
|
|
|
|
|
|
|
|
156 |
}
|
157 |
|
158 |
preset_names = list(preset_choices.keys())
|
@@ -210,6 +246,13 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
|
|
210 |
"Bass Boost": apply_bass_boost,
|
211 |
"Treble Boost": apply_treble_boost,
|
212 |
"Normalize": apply_normalize,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
}
|
214 |
|
215 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
@@ -312,7 +355,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
|
|
312 |
if not nonsilent_ranges:
|
313 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
314 |
|
315 |
-
trimmed = audio[nonsilent_ranges[0][0]:
|
316 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
317 |
trimmed.export(out_path, format="wav")
|
318 |
return out_path
|
@@ -326,7 +369,7 @@ def mix_tracks(track1, track2, volume_offset=0):
|
|
326 |
mixed.export(out_path, format="wav")
|
327 |
return out_path
|
328 |
|
329 |
-
# === Dummy Voice Cloning Tab – Works on
|
330 |
def clone_voice(*args):
|
331 |
return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
|
332 |
|
@@ -382,7 +425,14 @@ effect_options = [
|
|
382 |
"Stereo Widening",
|
383 |
"Bass Boost",
|
384 |
"Treble Boost",
|
385 |
-
"Normalize"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
386 |
]
|
387 |
|
388 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
@@ -421,7 +471,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
421 |
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
422 |
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
423 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
424 |
-
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]
|
425 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
426 |
],
|
427 |
outputs=[
|
@@ -462,7 +512,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
462 |
description="Convert voice to text and edit it before exporting again."
|
463 |
)
|
464 |
|
465 |
-
# --- Voice Cloning (
|
466 |
with gr.Tab("🎭 Voice Cloning (Local Only)"):
|
467 |
gr.Interface(
|
468 |
fn=clone_voice,
|
@@ -472,7 +522,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
472 |
gr.Textbox(label="Text to Clone", lines=5)
|
473 |
],
|
474 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
475 |
-
title="Replace One Voice With Another
|
476 |
description="Clone voice from source to target speaker using AI"
|
477 |
)
|
478 |
|
|
|
1 |
import gradio as gr
|
2 |
from pydub import AudioSegment
|
|
|
3 |
import numpy as np
|
4 |
import tempfile
|
5 |
import os
|
|
|
84 |
def apply_treble_boost(audio, gain=10):
|
85 |
return audio.high_pass_filter(4000).apply_gain(gain)
|
86 |
|
87 |
+
def apply_noise_gate(audio, threshold=-50.0, attack=50, release=100):
|
88 |
+
samples = np.array(audio.get_array_of_samples())
|
89 |
+
rms = np.sqrt(np.mean(samples**2))
|
90 |
+
if rms < 1:
|
91 |
+
return audio
|
92 |
+
normalized = samples / np.max(np.abs(samples))
|
93 |
+
envelope = np.abs(normalized)
|
94 |
+
gated = np.where(envelope > threshold / 100, normalized, 0)
|
95 |
+
return array_to_audiosegment(gated * np.iinfo(np.int16).max, audio.frame_rate, channels=audio.channels)
|
96 |
+
|
97 |
+
def apply_limiter(audio, limit_dB=-1):
|
98 |
+
limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
|
99 |
+
return limiter.apply_gain(limit_dB)
|
100 |
+
|
101 |
+
def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
|
102 |
+
return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
|
103 |
+
|
104 |
+
def apply_bitcrush(audio, bit_depth=8):
|
105 |
+
samples = np.array(audio.get_array_of_samples())
|
106 |
+
max_val = np.iinfo(np.int16).max
|
107 |
+
crushed = (samples // (max_val // (2 ** bit_depth))).astype(np.int16)
|
108 |
+
return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
|
109 |
+
|
110 |
+
def apply_auto_gain(audio, target_dB=-20):
|
111 |
+
change = target_dB - audio.dBFS
|
112 |
+
return audio.apply_gain(change)
|
113 |
+
|
114 |
+
def apply_vocal_distortion(audio, intensity=0.3):
|
115 |
+
samples = np.array(audio.get_array_of_samples()).astype(np.float32)
|
116 |
+
distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
|
117 |
+
return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
|
118 |
+
|
119 |
# === Vocal Isolation Helpers ===
|
120 |
def load_track_local(path, sample_rate, channels=2):
|
121 |
sig, rate = torchaudio.load(path)
|
|
|
183 |
preset_choices = {
|
184 |
"Default": [],
|
185 |
"Clean Podcast": ["Noise Reduction", "Normalize"],
|
186 |
+
"Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
|
187 |
+
"Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
|
188 |
+
"Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
|
189 |
+
"ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
|
190 |
+
"Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
|
191 |
+
"8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
|
192 |
}
|
193 |
|
194 |
preset_names = list(preset_choices.keys())
|
|
|
246 |
"Bass Boost": apply_bass_boost,
|
247 |
"Treble Boost": apply_treble_boost,
|
248 |
"Normalize": apply_normalize,
|
249 |
+
"Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
|
250 |
+
"Limiter": lambda x: apply_limiter(x, limit_dB=-1),
|
251 |
+
"Phaser": lambda x: apply_phaser(x),
|
252 |
+
"Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
|
253 |
+
"Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
|
254 |
+
"Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
|
255 |
+
"Vocal Distortion": lambda x: apply_vocal_distortion(x)
|
256 |
}
|
257 |
|
258 |
effects_to_apply = preset_choices.get(preset_name, selected_effects)
|
|
|
355 |
if not nonsilent_ranges:
|
356 |
return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
|
357 |
|
358 |
+
trimmed = audio[nonsilent_ranges[0][0]:nonsilent_tracks[-1][1]]
|
359 |
out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
|
360 |
trimmed.export(out_path, format="wav")
|
361 |
return out_path
|
|
|
369 |
mixed.export(out_path, format="wav")
|
370 |
return out_path
|
371 |
|
372 |
+
# === Dummy Voice Cloning Tab – Works on Local Only ===
|
373 |
def clone_voice(*args):
|
374 |
return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
|
375 |
|
|
|
425 |
"Stereo Widening",
|
426 |
"Bass Boost",
|
427 |
"Treble Boost",
|
428 |
+
"Normalize",
|
429 |
+
"Noise Gate",
|
430 |
+
"Limiter",
|
431 |
+
"Phaser",
|
432 |
+
"Flanger",
|
433 |
+
"Bitcrusher",
|
434 |
+
"Auto Gain",
|
435 |
+
"Vocal Distortion"
|
436 |
]
|
437 |
|
438 |
with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
|
|
|
471 |
gr.File(label="Upload Multiple Files", file_count="multiple"),
|
472 |
gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
|
473 |
gr.Checkbox(label="Isolate Vocals After Effects"),
|
474 |
+
gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
|
475 |
gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
|
476 |
],
|
477 |
outputs=[
|
|
|
512 |
description="Convert voice to text and edit it before exporting again."
|
513 |
)
|
514 |
|
515 |
+
# --- Voice Cloning (Local Only) ===
|
516 |
with gr.Tab("🎭 Voice Cloning (Local Only)"):
|
517 |
gr.Interface(
|
518 |
fn=clone_voice,
|
|
|
522 |
gr.Textbox(label="Text to Clone", lines=5)
|
523 |
],
|
524 |
outputs=gr.Audio(label="Cloned Output", type="filepath"),
|
525 |
+
title="Replace One Voice With Another",
|
526 |
description="Clone voice from source to target speaker using AI"
|
527 |
)
|
528 |
|