tee342 commited on
Commit
651e9be
·
verified ·
1 Parent(s): 6085d7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -8
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  from pydub import AudioSegment
3
- from pydub.silence import detect_nonsilent
4
  import numpy as np
5
  import tempfile
6
  import os
@@ -85,6 +84,38 @@ def apply_bass_boost(audio, gain=10):
85
  def apply_treble_boost(audio, gain=10):
86
  return audio.high_pass_filter(4000).apply_gain(gain)
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  # === Vocal Isolation Helpers ===
89
  def load_track_local(path, sample_rate, channels=2):
90
  sig, rate = torchaudio.load(path)
@@ -152,7 +183,12 @@ if not preset_choices:
152
  preset_choices = {
153
  "Default": [],
154
  "Clean Podcast": ["Noise Reduction", "Normalize"],
155
- "Music Remix": ["Bass Boost", "Stereo Widening"]
 
 
 
 
 
156
  }
157
 
158
  preset_names = list(preset_choices.keys())
@@ -210,6 +246,13 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
210
  "Bass Boost": apply_bass_boost,
211
  "Treble Boost": apply_treble_boost,
212
  "Normalize": apply_normalize,
 
 
 
 
 
 
 
213
  }
214
 
215
  effects_to_apply = preset_choices.get(preset_name, selected_effects)
@@ -312,7 +355,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
312
  if not nonsilent_ranges:
313
  return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
314
 
315
- trimmed = audio[nonsilent_ranges[0][0]:nonsilent_ranges[-1][1]]
316
  out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
317
  trimmed.export(out_path, format="wav")
318
  return out_path
@@ -326,7 +369,7 @@ def mix_tracks(track1, track2, volume_offset=0):
326
  mixed.export(out_path, format="wav")
327
  return out_path
328
 
329
- # === Dummy Voice Cloning Tab – Works on Hugging Face ===
330
  def clone_voice(*args):
331
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
332
 
@@ -382,7 +425,14 @@ effect_options = [
382
  "Stereo Widening",
383
  "Bass Boost",
384
  "Treble Boost",
385
- "Normalize"
 
 
 
 
 
 
 
386
  ]
387
 
388
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
@@ -421,7 +471,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
421
  gr.File(label="Upload Multiple Files", file_count="multiple"),
422
  gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
423
  gr.Checkbox(label="Isolate Vocals After Effects"),
424
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
425
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
426
  ],
427
  outputs=[
@@ -462,7 +512,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
462
  description="Convert voice to text and edit it before exporting again."
463
  )
464
 
465
- # --- Voice Cloning (Dubbing) – Dummy for Hugging Face ===
466
  with gr.Tab("🎭 Voice Cloning (Local Only)"):
467
  gr.Interface(
468
  fn=clone_voice,
@@ -472,7 +522,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
472
  gr.Textbox(label="Text to Clone", lines=5)
473
  ],
474
  outputs=gr.Audio(label="Cloned Output", type="filepath"),
475
- title="Replace One Voice With Another (Local Only)",
476
  description="Clone voice from source to target speaker using AI"
477
  )
478
 
 
1
  import gradio as gr
2
  from pydub import AudioSegment
 
3
  import numpy as np
4
  import tempfile
5
  import os
 
84
  def apply_treble_boost(audio, gain=10):
85
  return audio.high_pass_filter(4000).apply_gain(gain)
86
 
87
+ def apply_noise_gate(audio, threshold=-50.0, attack=50, release=100):
88
+ samples = np.array(audio.get_array_of_samples())
89
+ rms = np.sqrt(np.mean(samples**2))
90
+ if rms < 1:
91
+ return audio
92
+ normalized = samples / np.max(np.abs(samples))
93
+ envelope = np.abs(normalized)
94
+ gated = np.where(envelope > threshold / 100, normalized, 0)
95
+ return array_to_audiosegment(gated * np.iinfo(np.int16).max, audio.frame_rate, channels=audio.channels)
96
+
97
+ def apply_limiter(audio, limit_dB=-1):
98
+ limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
99
+ return limiter.apply_gain(limit_dB)
100
+
101
+ def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
102
+ return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
103
+
104
+ def apply_bitcrush(audio, bit_depth=8):
105
+ samples = np.array(audio.get_array_of_samples())
106
+ max_val = np.iinfo(np.int16).max
107
+ crushed = (samples // (max_val // (2 ** bit_depth))).astype(np.int16)
108
+ return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
109
+
110
+ def apply_auto_gain(audio, target_dB=-20):
111
+ change = target_dB - audio.dBFS
112
+ return audio.apply_gain(change)
113
+
114
+ def apply_vocal_distortion(audio, intensity=0.3):
115
+ samples = np.array(audio.get_array_of_samples()).astype(np.float32)
116
+ distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
117
+ return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
118
+
119
  # === Vocal Isolation Helpers ===
120
  def load_track_local(path, sample_rate, channels=2):
121
  sig, rate = torchaudio.load(path)
 
183
  preset_choices = {
184
  "Default": [],
185
  "Clean Podcast": ["Noise Reduction", "Normalize"],
186
+ "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
187
+ "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
188
+ "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
189
+ "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
190
+ "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
191
+ "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
192
  }
193
 
194
  preset_names = list(preset_choices.keys())
 
246
  "Bass Boost": apply_bass_boost,
247
  "Treble Boost": apply_treble_boost,
248
  "Normalize": apply_normalize,
249
+ "Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
250
+ "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
251
+ "Phaser": lambda x: apply_phaser(x),
252
+ "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
253
+ "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
254
+ "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
255
+ "Vocal Distortion": lambda x: apply_vocal_distortion(x)
256
  }
257
 
258
  effects_to_apply = preset_choices.get(preset_name, selected_effects)
 
355
  if not nonsilent_ranges:
356
  return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
357
 
358
+ trimmed = audio[nonsilent_ranges[0][0]:nonsilent_tracks[-1][1]]
359
  out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
360
  trimmed.export(out_path, format="wav")
361
  return out_path
 
369
  mixed.export(out_path, format="wav")
370
  return out_path
371
 
372
+ # === Dummy Voice Cloning Tab – Works on Local Only ===
373
  def clone_voice(*args):
374
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
375
 
 
425
  "Stereo Widening",
426
  "Bass Boost",
427
  "Treble Boost",
428
+ "Normalize",
429
+ "Noise Gate",
430
+ "Limiter",
431
+ "Phaser",
432
+ "Flanger",
433
+ "Bitcrusher",
434
+ "Auto Gain",
435
+ "Vocal Distortion"
436
  ]
437
 
438
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
 
471
  gr.File(label="Upload Multiple Files", file_count="multiple"),
472
  gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
473
  gr.Checkbox(label="Isolate Vocals After Effects"),
474
+ gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
475
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
476
  ],
477
  outputs=[
 
512
  description="Convert voice to text and edit it before exporting again."
513
  )
514
 
515
+ # --- Voice Cloning (Local Only) ===
516
  with gr.Tab("🎭 Voice Cloning (Local Only)"):
517
  gr.Interface(
518
  fn=clone_voice,
 
522
  gr.Textbox(label="Text to Clone", lines=5)
523
  ],
524
  outputs=gr.Audio(label="Cloned Output", type="filepath"),
525
+ title="Replace One Voice With Another",
526
  description="Clone voice from source to target speaker using AI"
527
  )
528