tee342 commited on
Commit
9b24ddd
Β·
verified Β·
1 Parent(s): 651e9be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -10
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from pydub import AudioSegment
 
3
  import numpy as np
4
  import tempfile
5
  import os
@@ -102,9 +103,9 @@ def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
102
  return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
103
 
104
  def apply_bitcrush(audio, bit_depth=8):
105
- samples = np.array(audio.get_array_of_samples())
106
  max_val = np.iinfo(np.int16).max
107
- crushed = (samples // (max_val // (2 ** bit_depth))).astype(np.int16)
108
  return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
109
 
110
  def apply_auto_gain(audio, target_dB=-20):
@@ -116,6 +117,16 @@ def apply_vocal_distortion(audio, intensity=0.3):
116
  distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
117
  return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
118
 
 
 
 
 
 
 
 
 
 
 
119
  # === Vocal Isolation Helpers ===
120
  def load_track_local(path, sample_rate, channels=2):
121
  sig, rate = torchaudio.load(path)
@@ -188,7 +199,15 @@ if not preset_choices:
188
  "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
189
  "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
190
  "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
191
- "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"]
 
 
 
 
 
 
 
 
192
  }
193
 
194
  preset_names = list(preset_choices.keys())
@@ -252,7 +271,9 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
252
  "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
253
  "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
254
  "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
255
- "Vocal Distortion": lambda x: apply_vocal_distortion(x)
 
 
256
  }
257
 
258
  effects_to_apply = preset_choices.get(preset_name, selected_effects)
@@ -317,7 +338,7 @@ def transcribe_audio(audio_path):
317
  text = " ".join([seg.text for seg in segments])
318
  return text
319
 
320
- # === TTS Tab ===
321
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
322
 
323
  def generate_tts(text):
@@ -355,7 +376,7 @@ def detect_silence(audio_file, silence_threshold=-50.0, min_silence_len=1000):
355
  if not nonsilent_ranges:
356
  return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
357
 
358
- trimmed = audio[nonsilent_ranges[0][0]:nonsilent_tracks[-1][1]]
359
  out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
360
  trimmed.export(out_path, format="wav")
361
  return out_path
@@ -369,7 +390,7 @@ def mix_tracks(track1, track2, volume_offset=0):
369
  mixed.export(out_path, format="wav")
370
  return out_path
371
 
372
- # === Dummy Voice Cloning Tab – Works on Local Only ===
373
  def clone_voice(*args):
374
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
375
 
@@ -432,7 +453,9 @@ effect_options = [
432
  "Flanger",
433
  "Bitcrusher",
434
  "Auto Gain",
435
- "Vocal Distortion"
 
 
436
  ]
437
 
438
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
@@ -512,6 +535,47 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
512
  description="Convert voice to text and edit it before exporting again."
513
  )
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  # --- Voice Cloning (Local Only) ===
516
  with gr.Tab("🎭 Voice Cloning (Local Only)"):
517
  gr.Interface(
@@ -563,7 +627,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
563
  return None, None, None, None
564
 
565
  with gr.Tab("🧾 Auto-Save & Resume"):
566
- gr.Markdown("Save your current state and resume editing later.")
567
 
568
  action_radio = gr.Radio(["save", "load"], label="Action", value="save")
569
  audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
@@ -622,7 +686,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
622
  )
623
 
624
  # --- Mix Two Tracks ===
625
- with gr.Tab("πŸ”€ Mix Two Tracks"):
626
  gr.Interface(
627
  fn=mix_tracks,
628
  inputs=[
 
1
  import gradio as gr
2
  from pydub import AudioSegment
3
+ from pydub.silence import detect_nonsilent
4
  import numpy as np
5
  import tempfile
6
  import os
 
103
  return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
104
 
105
  def apply_bitcrush(audio, bit_depth=8):
106
+ samples = np.array(audio.get_array_of_samples()).astype(np.float32)
107
  max_val = np.iinfo(np.int16).max
108
+ crushed = ((samples / max_val) * (2 ** bit_depth)).astype(np.int16)
109
  return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
110
 
111
  def apply_auto_gain(audio, target_dB=-20):
 
117
  distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
118
  return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
119
 
120
+ def apply_harmony(audio, shift_semitones=4):
121
+ shifted_up = apply_pitch_shift(audio, shift_semitones)
122
+ shifted_down = apply_pitch_shift(audio, -shift_semitones)
123
+ return audio.overlay(shifted_up).overlay(shifted_down)
124
+
125
+ def apply_stage_mode(audio):
126
+ processed = apply_reverb(audio)
127
+ processed = apply_bass_boost(processed, gain=6)
128
+ return apply_limiter(processed, limit_dB=-2)
129
+
130
  # === Vocal Isolation Helpers ===
131
  def load_track_local(path, sample_rate, channels=2):
132
  sig, rate = torchaudio.load(path)
 
199
  "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
200
  "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
201
  "Voiceover Pro": ["Vocal Isolation", "TTS", "EQ Match"],
202
+ "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
203
+
204
+ # 🎀 Vocalist Presets
205
+ "πŸŽ™ Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
206
+ "πŸ§ͺ Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
207
+ "🎢 Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
208
+ "🌫 ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
209
+ "🎼 Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
210
+ "🎡 Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"]
211
  }
212
 
213
  preset_names = list(preset_choices.keys())
 
271
  "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
272
  "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
273
  "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
274
+ "Vocal Distortion": lambda x: apply_vocal_distortion(x),
275
+ "Harmony": lambda x: apply_harmony(x),
276
+ "Stage Mode": apply_stage_mode
277
  }
278
 
279
  effects_to_apply = preset_choices.get(preset_name, selected_effects)
 
338
  text = " ".join([seg.text for seg in segments])
339
  return text
340
 
341
+ # === TTS Voice Generator ===
342
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
343
 
344
  def generate_tts(text):
 
376
  if not nonsilent_ranges:
377
  return audio.export(os.path.join(tempfile.gettempdir(), "trimmed.wav"), format="wav")
378
 
379
+ trimmed = audio[nonsilent_ranges[0][0]:nonsilent_ranges[-1][1]]
380
  out_path = os.path.join(tempfile.gettempdir(), "trimmed.wav")
381
  trimmed.export(out_path, format="wav")
382
  return out_path
 
390
  mixed.export(out_path, format="wav")
391
  return out_path
392
 
393
+ # === Dummy Voice Cloning Tab – Works Locally Only ===
394
  def clone_voice(*args):
395
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
396
 
 
453
  "Flanger",
454
  "Bitcrusher",
455
  "Auto Gain",
456
+ "Vocal Distortion",
457
+ "Harmony",
458
+ "Stage Mode"
459
  ]
460
 
461
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
 
535
  description="Convert voice to text and edit it before exporting again."
536
  )
537
 
538
+ # --- Vocal Presets for Singers ===
539
+ with gr.Tab("🎀 Vocal Presets for Singers"):
540
+ gr.Interface(
541
+ fn=process_audio,
542
+ inputs=[
543
+ gr.Audio(label="Upload Vocal Track", type="filepath"),
544
+ gr.CheckboxGroup(choices=[
545
+ "Noise Reduction",
546
+ "Normalize",
547
+ "Compress Dynamic Range",
548
+ "Bass Boost",
549
+ "Treble Boost",
550
+ "Reverb",
551
+ "Auto Gain",
552
+ "Vocal Distortion",
553
+ "Harmony",
554
+ "Stage Mode"
555
+ ]),
556
+ gr.Checkbox(label="Isolate Vocals After Effects"),
557
+ gr.Dropdown(choices=[
558
+ "πŸŽ™ Clean Vocal",
559
+ "πŸ§ͺ Vocal Distortion",
560
+ "🎢 Singer's Harmony",
561
+ "🌫 ASMR Vocal",
562
+ "🎼 Stage Mode",
563
+ "🎡 Auto-Tune Style"
564
+ ], label="Select Vocal Preset", value="Default"),
565
+ gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
566
+ ],
567
+ outputs=[
568
+ gr.Audio(label="Processed Vocal", type="filepath"),
569
+ gr.Image(label="Waveform Preview"),
570
+ gr.Textbox(label="Session Log (JSON)", lines=5),
571
+ gr.Textbox(label="Detected Genre", lines=1),
572
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
573
+ ],
574
+ title="Create Studio-Quality Vocal Tracks",
575
+ description="Apply singer-friendly presets and effects to enhance vocals.",
576
+ allow_flagging="never"
577
+ )
578
+
579
  # --- Voice Cloning (Local Only) ===
580
  with gr.Tab("🎭 Voice Cloning (Local Only)"):
581
  gr.Interface(
 
627
  return None, None, None, None
628
 
629
  with gr.Tab("🧾 Auto-Save & Resume"):
630
+ gr.Markdown("Save your current state and resume later.")
631
 
632
  action_radio = gr.Radio(["save", "load"], label="Action", value="save")
633
  audio_input = gr.Audio(label="Upload or Load Audio", type="filepath")
 
686
  )
687
 
688
  # --- Mix Two Tracks ===
689
+ with gr.Tab(" remix mode"),
690
  gr.Interface(
691
  fn=mix_tracks,
692
  inputs=[