tee342 commited on
Commit
7b9755f
Β·
verified Β·
1 Parent(s): 98d55b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -39
app.py CHANGED
@@ -85,7 +85,7 @@ def apply_bass_boost(audio, gain=10):
85
  def apply_treble_boost(audio, gain=10):
86
  return audio.high_pass_filter(4000).apply_gain(gain)
87
 
88
- def apply_noise_gate(audio, threshold=-50.0, attack=50, release=100):
89
  samples = np.array(audio.get_array_of_samples())
90
  rms = np.sqrt(np.mean(samples**2))
91
  if rms < 1:
@@ -99,8 +99,8 @@ def apply_limiter(audio, limit_dB=-1):
99
  limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
100
  return limiter.apply_gain(limit_dB)
101
 
102
- def apply_phaser(audio, rate=0.5, depth=0.7, feedback=0.2, mix=0.5):
103
- return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * rate)})
104
 
105
  def apply_bitcrush(audio, bit_depth=8):
106
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
@@ -127,6 +127,23 @@ def apply_stage_mode(audio):
127
  processed = apply_bass_boost(processed, gain=6)
128
  return apply_limiter(processed, limit_dB=-2)
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # === Vocal Isolation Helpers ===
131
  def load_track_local(path, sample_rate, channels=2):
132
  sig, rate = torchaudio.load(path)
@@ -390,10 +407,49 @@ def mix_tracks(track1, track2, volume_offset=0):
390
  mixed.export(out_path, format="wav")
391
  return out_path
392
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
  # === Dummy Voice Cloning Tab – Works Locally Only ===
394
  def clone_voice(*args):
395
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  # === Speaker Diarization ("Who Spoke When?") ===
398
  try:
399
  from pyannote.audio import Pipeline as DiarizationPipeline
@@ -417,7 +473,6 @@ def diarize_and_transcribe(audio_path):
417
  audio.export(temp_wav, format="wav")
418
 
419
  try:
420
- from pyannote.audio import Pipeline as DiarizationPipeline
421
  diarization = diarize_pipeline(temp_wav)
422
 
423
  result = whisper.transcribe(temp_wav)
@@ -525,6 +580,20 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
525
  clear_btn=None
526
  )
527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  # --- Transcribe & Edit Tab ===
529
  with gr.Tab("πŸ“ Transcribe & Edit"):
530
  gr.Interface(
@@ -535,40 +604,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
535
  description="Convert voice to text and edit it before exporting again."
536
  )
537
 
538
- # --- Vocal Presets for Singers ===
539
- with gr.Tab("🎀 Vocal Presets for Singers"):
540
- gr.Interface(
541
- fn=process_audio,
542
- inputs=[
543
- gr.Audio(label="Upload Vocal Track", type="filepath"),
544
- gr.CheckboxGroup(choices=[
545
- "Noise Reduction",
546
- "Normalize",
547
- "Compress Dynamic Range",
548
- "Bass Boost",
549
- "Treble Boost",
550
- "Reverb",
551
- "Auto Gain",
552
- "Vocal Distortion",
553
- "Harmony",
554
- "Stage Mode"
555
- ]),
556
- gr.Checkbox(label="Isolate Vocals After Effects"),
557
- gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0] if preset_names else None),
558
- gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
559
- ],
560
- outputs=[
561
- gr.Audio(label="Processed Vocal", type="filepath"),
562
- gr.Image(label="Waveform Preview"),
563
- gr.Textbox(label="Session Log (JSON)", lines=5),
564
- gr.Textbox(label="Detected Genre", lines=1),
565
- gr.Textbox(label="Status", value="βœ… Ready", lines=1)
566
- ],
567
- title="Create Studio-Quality Vocal Tracks",
568
- description="Apply singer-friendly presets and effects to enhance vocals.",
569
- allow_flagging="never"
570
- )
571
-
572
  # --- Voice Cloning (Local Only) ===
573
  with gr.Tab("🎭 Voice Cloning (Local Only)"):
574
  gr.Interface(
@@ -679,7 +714,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
679
  )
680
 
681
  # --- Mix Two Tracks ===
682
- with gr.Tab("πŸ”€ Mix Two Tracks"):
683
  gr.Interface(
684
  fn=mix_tracks,
685
  inputs=[
 
85
  def apply_treble_boost(audio, gain=10):
86
  return audio.high_pass_filter(4000).apply_gain(gain)
87
 
88
+ def apply_noise_gate(audio, threshold=-50.0):
89
  samples = np.array(audio.get_array_of_samples())
90
  rms = np.sqrt(np.mean(samples**2))
91
  if rms < 1:
 
99
  limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
100
  return limiter.apply_gain(limit_dB)
101
 
102
+ def apply_phaser(audio):
103
+ return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * 1.1)})
104
 
105
  def apply_bitcrush(audio, bit_depth=8):
106
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
 
127
  processed = apply_bass_boost(processed, gain=6)
128
  return apply_limiter(processed, limit_dB=-2)
129
 
130
+ # === Genre Mastering Presets ===
131
+ genre_presets = {
132
+ "Soul": ["Warmth", "Bass Boost (+6dB)", "Mid Enhance"],
133
+ "Funk": ["Treble Boost (+6dB)", "Compression", "Stereo Widening"],
134
+ "Rock": ["Distortion", "Punchy Mids", "Reverb"],
135
+ "Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
136
+ "Acoustic": ["Natural Reverb", "Gentle Compression", "Mid Focus"],
137
+ "Dance": ["Loudness Maximizer", "Bass Emphasis", "Stereo Widen"],
138
+ "EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
139
+ "Country": ["Clean Mix", "Subtle Reverb", "Mid Focus"],
140
+ "Disco": ["Rhythmic Echo", "Bass Thump", "Treble Boost (+8dB)"],
141
+ "Metal": ["Distortion", "High Gain", "Crisp Highs"],
142
+ "Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"],
143
+ "Trap": ["808 Bass", "Reverb", "Lo-Fi Texture"],
144
+ "LoFi": ["Bitcrusher", "Tape Hiss", "Soft Compression"]
145
+ }
146
+
147
  # === Vocal Isolation Helpers ===
148
  def load_track_local(path, sample_rate, channels=2):
149
  sig, rate = torchaudio.load(path)
 
407
  mixed.export(out_path, format="wav")
408
  return out_path
409
 
410
+ # === Genre Mastering Tab ===
411
+ def apply_genre_preset(audio, genre):
412
+ global preset_choices
413
+ selected_preset = preset_choices.get(genre, [])
414
+ return process_audio(audio, selected_preset, False, genre, "WAV")
415
+
416
+ with gr.Tab("🎧 Genre Mastering"):
417
+ gr.Markdown("Apply pre-tuned mastering settings for different music genres.")
418
+
419
+ genre_dropdown = gr.Dropdown(
420
+ choices=list(genre_presets.keys()),
421
+ label="Select Genre",
422
+ value="Pop"
423
+ )
424
+
425
+ gr.Interface(
426
+ fn=lambda audio, genre: apply_genre_preset(audio, genre)[0],
427
+ inputs=[
428
+ gr.Audio(label="Upload Track", type="filepath"),
429
+ genre_dropdown
430
+ ],
431
+ outputs=gr.Audio(label="Mastered Output", type="filepath"),
432
+ title="Genre-Specific Mastering",
433
+ description="Apply professionally tuned presets for popular music genres."
434
+ )
435
+
436
  # === Dummy Voice Cloning Tab – Works Locally Only ===
437
  def clone_voice(*args):
438
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
439
 
440
+ with gr.Tab("🎭 Voice Cloning (Local Only)"):
441
+ gr.Interface(
442
+ fn=clone_voice,
443
+ inputs=[
444
+ gr.File(label="Source Voice Clip"),
445
+ gr.File(label="Target Voice Clip"),
446
+ gr.Textbox(label="Text to Clone", lines=5)
447
+ ],
448
+ outputs=gr.Audio(label="Cloned Output", type="filepath"),
449
+ title="Replace One Voice With Another",
450
+ description="Clone voice from source to target speaker using AI"
451
+ )
452
+
453
  # === Speaker Diarization ("Who Spoke When?") ===
454
  try:
455
  from pyannote.audio import Pipeline as DiarizationPipeline
 
473
  audio.export(temp_wav, format="wav")
474
 
475
  try:
 
476
  diarization = diarize_pipeline(temp_wav)
477
 
478
  result = whisper.transcribe(temp_wav)
 
580
  clear_btn=None
581
  )
582
 
583
+ # --- Genre Mastering Tab ===
584
+ with gr.Tab("🎧 Genre Mastering"):
585
+ gr.Interface(
586
+ fn=lambda audio, genre: apply_genre_preset(audio, genre)[0],
587
+ inputs=[
588
+ gr.Audio(label="Upload Track", type="filepath"),
589
+ gr.Dropdown(choices=list(genre_presets.keys()), label="Select Genre", value="Pop")
590
+ ],
591
+ outputs=gr.Audio(label="Mastered Output", type="filepath"),
592
+ title="Genre-Specific Mastering",
593
+ description="Apply professionally tuned presets for popular music genres.",
594
+ allow_flagging="never"
595
+ )
596
+
597
  # --- Transcribe & Edit Tab ===
598
  with gr.Tab("πŸ“ Transcribe & Edit"):
599
  gr.Interface(
 
604
  description="Convert voice to text and edit it before exporting again."
605
  )
606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  # --- Voice Cloning (Local Only) ===
608
  with gr.Tab("🎭 Voice Cloning (Local Only)"):
609
  gr.Interface(
 
714
  )
715
 
716
  # --- Mix Two Tracks ===
717
+ with gr.Tab(" remix mode"),
718
  gr.Interface(
719
  fn=mix_tracks,
720
  inputs=[