tee342 commited on
Commit
8a18fa3
·
verified ·
1 Parent(s): 325283b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -338
app.py CHANGED
@@ -5,12 +5,10 @@ import numpy as np
5
  import tempfile
6
  import os
7
  import noisereduce as nr
8
- import json
9
  import torch
10
  from demucs import pretrained
11
  from demucs.apply import apply_model
12
  import torchaudio
13
- from pathlib import Path
14
  import matplotlib.pyplot as plt
15
  from io import BytesIO
16
  from PIL import Image
@@ -19,9 +17,8 @@ import datetime
19
  import librosa
20
  import warnings
21
  from faster_whisper import WhisperModel
22
- from mutagen.mp3 import MP3
23
- from mutagen.id3 import ID3, TIT2, TPE1, TALB, TYER
24
  from TTS.api import TTS
 
25
  import pickle
26
 
27
  # Suppress warnings
@@ -58,17 +55,8 @@ def apply_reverb(audio):
58
  def apply_pitch_shift(audio, semitones=-2):
59
  new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
60
  samples = np.array(audio.get_array_of_samples())
61
- resampled = np.interp(
62
- np.arange(0, len(samples), 2 ** (semitones / 12)),
63
- np.arange(len(samples)),
64
- samples
65
- ).astype(np.int16)
66
- return AudioSegment(
67
- resampled.tobytes(),
68
- frame_rate=new_frame_rate,
69
- sample_width=audio.sample_width,
70
- channels=audio.channels
71
- )
72
 
73
  def apply_echo(audio, delay_ms=500, decay=0.5):
74
  echo = audio - 10
@@ -138,41 +126,17 @@ def match_loudness(audio_path, target_lufs=-14.0):
138
  adjusted.export(out_path, format="wav")
139
  return out_path
140
 
141
- # === AI Mastering Chain – Genre EQ + Loudness Match + Limiting ===
142
- def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
143
- audio = AudioSegment.from_file(audio_path)
144
-
145
- # Apply Genre EQ
146
- eq_audio = auto_eq(audio, genre=genre)
147
-
148
- # Convert to numpy for loudness
149
- samples, sr = audiosegment_to_array(eq_audio)
150
-
151
- # Apply loudness normalization
152
- meter = pyln.Meter(sr)
153
- loudness = meter.integrated_loudness(samples.astype(np.float64) / 32768.0)
154
- gain_db = target_lufs - loudness
155
- final_audio = eq_audio + gain_db
156
-
157
- # Apply final limiting
158
- final_audio = apply_limiter(final_audio)
159
-
160
- out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
161
- final_audio.export(out_path, format="wav")
162
- return out_path
163
-
164
  # === Auto-EQ per Genre ===
165
  def auto_eq(audio, genre="Pop"):
166
  eq_map = {
167
- "Pop": [(200, 500, -3), (2000, 4000, +4)], # Cut muddiness, boost vocals
168
- "EDM": [(60, 250, +6), (8000, 12000, +3)], # Maximize bass & sparkle
169
- "Rock": [(1000, 3000, +4), (7000, 10000, -3)], # Punchy mids, reduce sibilance
170
- "Hip-Hop": [(20, 100, +6), (7000, 10000, -4)], # Deep lows, smooth highs
171
- "Acoustic": [(100, 300, -3), (4000, 8000, +2)], # Natural tone
172
- "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)], # Clear low-mids, crisp highs
173
- "Trap": [(80, 120, +6), (3000, 6000, -4)], # Sub-bass boost, cut harsh highs
174
- "LoFi": [(20, 200, +3), (1000, 3000, -2)], # Warmth, soft mids
175
- "Default": []
176
  }
177
 
178
  from scipy.signal import butter, sosfilt
@@ -191,6 +155,23 @@ def auto_eq(audio, genre="Pop"):
191
 
192
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  # === Harmonic Saturation / Exciter ===
195
  def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
196
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
@@ -208,81 +189,6 @@ def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
208
 
209
  return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
210
 
211
- # === Vocal Isolation Helpers ===
212
- def load_track_local(path, sample_rate, channels=2):
213
- sig, rate = torchaudio.load(path)
214
- if rate != sample_rate:
215
- sig = torchaudio.functional.resample(sig, rate, sample_rate)
216
- if channels == 1:
217
- sig = sig.mean(0)
218
- return sig
219
-
220
- def save_track(path, wav, sample_rate):
221
- path = Path(path)
222
- torchaudio.save(str(path), wav, sample_rate)
223
-
224
- def apply_vocal_isolation(audio_path):
225
- model = pretrained.get_model(name='htdemucs')
226
- wav = load_track_local(audio_path, model.samplerate, channels=2)
227
- ref = wav.mean(0)
228
- wav -= ref[:, None]
229
- sources = apply_model(model, wav[None])[0]
230
- wav += ref[:, None]
231
-
232
- vocal_track = sources[3].cpu()
233
- out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
234
- save_track(out_path, vocal_track, model.samplerate)
235
- return out_path
236
-
237
- # === Stem Splitting (Drums, Bass, Other, Vocals) ===
238
- def stem_split(audio_path):
239
- model = pretrained.get_model(name='htdemucs')
240
- wav = load_track_local(audio_path, model.samplerate, channels=2)
241
- sources = apply_model(model, wav[None])[0]
242
-
243
- output_dir = tempfile.mkdtemp()
244
- stem_paths = []
245
-
246
- for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
247
- path = os.path.join(output_dir, f"{name}.wav")
248
- save_track(path, sources[i].cpu(), model.samplerate)
249
- stem_paths.append(gr.File(value=path))
250
-
251
- return stem_paths
252
-
253
- # === Save/Load Project File (.aiproj) ===
254
- def save_project(vocals, drums, bass, other, vol_vocals, vol_drums, vol_bass, vol_other):
255
- project_data = {
256
- "vocals": AudioSegment.from_file(vocals).raw_data,
257
- "drums": AudioSegment.from_file(drums).raw_data,
258
- "bass": AudioSegment.from_file(bass).raw_data,
259
- "other": AudioSegment.from_file(other).raw_data,
260
- "volumes": {
261
- "vocals": vol_vocals,
262
- "drums": vol_drums,
263
- "bass": vol_bass,
264
- "other": vol_other
265
- }
266
- }
267
- out_path = os.path.join(tempfile.gettempdir(), "mix_session.aiproj")
268
- with open(out_path, "wb") as f:
269
- pickle.dump(project_data, f)
270
- return out_path
271
-
272
- def load_project(project_file):
273
- with open(project_file.name, "rb") as f:
274
- data = pickle.load(f)
275
- return (
276
- array_to_audiosegment(data["vocals"], 44100),
277
- array_to_audiosegment(data["drums"], 44100),
278
- array_to_audiosegment(data["bass"], 44100),
279
- array_to_audiosegment(data["other"], 44100),
280
- data["volumes"]["vocals"],
281
- data["volumes"]["drums"],
282
- data["volumes"]["bass"],
283
- data["volumes"]["other"]
284
- )
285
-
286
  # === Process Audio Function ===
287
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
288
  status = "🔊 Loading audio..."
@@ -302,7 +208,6 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
302
  "Normalize": apply_normalize,
303
  "Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
304
  "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
305
- "Phaser": lambda x: apply_phaser(x),
306
  "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
307
  "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
308
  "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
@@ -340,7 +245,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
340
  status = f"❌ Error: {str(e)}"
341
  return None, None, status, "", status
342
 
343
- # === Waveform + Spectrogram Generator ===
344
  def show_waveform(audio_file):
345
  try:
346
  audio = AudioSegment.from_file(audio_file)
@@ -364,7 +269,6 @@ def detect_genre(audio_path):
364
  except Exception:
365
  return "Unknown"
366
 
367
- # === Session Info Export ===
368
  def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
369
  log = {
370
  "timestamp": str(datetime.datetime.now()),
@@ -396,34 +300,50 @@ preset_choices = {
396
 
397
  preset_names = list(preset_choices.keys())
398
 
399
- # === Main UI ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
401
- gr.Markdown("## 🎧 Ultimate AI Audio Studio\nUpload, edit, export — powered by AI!")
 
402
 
403
- # --- Single File Studio ---
404
  with gr.Tab("🎵 Single File Studio"):
405
- gr.Interface(
406
- fn=process_audio,
407
- inputs=[
408
- gr.Audio(label="Upload Audio", type="filepath"),
409
- gr.CheckboxGroup(choices=preset_choices.get("Default", []), label="Apply Effects in Order"),
410
- gr.Checkbox(label="Isolate Vocals After Effects"),
411
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
412
- gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
413
- ],
414
- outputs=[
415
- gr.Audio(label="Processed Audio", type="filepath"),
416
- gr.Image(label="Waveform Preview"),
417
- gr.Textbox(label="Session Log (JSON)", lines=5),
418
- gr.Textbox(label="Detected Genre", lines=1),
419
- gr.Textbox(label="Status", value="✅ Ready", lines=1)
420
- ],
421
- title="Edit One File at a Time",
422
- description="Apply effects, preview waveform, and get full session log.",
423
- flagging_mode="never",
424
- submit_btn="Process Audio",
425
- clear_btn=None
426
- )
427
 
428
  # --- AI Mastering Chain Tab ===
429
  with gr.Tab("🎧 AI Mastering Chain"):
@@ -454,6 +374,44 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
454
  description="Enhance clarity and presence using saturation styles like Tube or Tape."
455
  )
456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  # --- Remix Mode ---
458
  with gr.Tab("🎛 Remix Mode"):
459
  gr.Interface(
@@ -506,37 +464,14 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
506
  description="Correct vocal pitch automatically"
507
  )
508
 
509
- # --- Create Karaoke Video from Audio + Lyrics ===
510
- with gr.Tab("📹 Create Karaoke Video"):
511
- gr.Interface(
512
- fn=create_karaoke_video,
513
- inputs=[
514
- gr.Audio(label="Upload Track", type="filepath"),
515
- gr.Textbox(label="Lyrics", lines=10),
516
- gr.File(label="Background (Optional)")
517
- ],
518
- outputs=gr.Video(label="Karaoke Video"),
519
- title="Make Karaoke Videos from Audio + Lyrics",
520
- description="Generate karaoke-style videos with real-time sync."
521
- )
522
-
523
- # --- Vocal Doubler / Harmonizer ===
524
- with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
525
- gr.Interface(
526
- fn=vocal_doubler,
527
- inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
528
- outputs=gr.Audio(label="Doubled Output", type="filepath"),
529
- title="Add Vocal Doubling / Harmony",
530
- description="Enhance vocals with doubling or harmony"
531
- )
532
-
533
  # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
534
- with gr.Tab("📊 Real-Time Spectrum"):
535
  gr.Interface(
536
  fn=visualize_spectrum,
537
  inputs=gr.Audio(label="Upload Track", type="filepath"),
538
  outputs=gr.Image(label="Spectrum Analysis"),
539
- title="See the frequency breakdown of your audio"
 
540
  )
541
 
542
  # --- Loudness Graph Tab ===
@@ -567,40 +502,28 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
567
  )
568
 
569
  # --- Save/Load Mix Session (.aiproj) ===
570
- with gr.Tab("📁 Save/Load Mix Session"):
571
  gr.Interface(
572
  fn=save_project,
573
  inputs=[
574
- gr.File(label="Vocals"),
575
- gr.File(label="Drums"),
576
- gr.File(label="Bass"),
577
- gr.File(label="Other"),
578
- gr.Slider(minimum=-10, maximum=10, value=0, label="Vocals Volume"),
579
- gr.Slider(minimum=-10, maximum=10, value=0, label="Drums Volume"),
580
- gr.Slider(minimum=-10, maximum=10, value=0, label="Bass Volume"),
581
- gr.Slider(minimum=-10, maximum=10, value=0, label="Other Volume")
582
  ],
583
  outputs=gr.File(label="Project File (.aiproj)"),
584
- title="Save Your Full Mix Session",
585
- description="Save stems, volumes, and settings in one file."
586
  )
587
 
588
  gr.Interface(
589
  fn=load_project,
590
  inputs=gr.File(label="Upload .aiproj File"),
591
  outputs=[
592
- gr.File(label="Vocals"),
593
- gr.File(label="Drums"),
594
- gr.File(label="Bass"),
595
- gr.File(label="Other"),
596
- gr.Slider(label="Vocals Volume"),
597
- gr.Slider(label="Drums Volume"),
598
- gr.Slider(label="Bass Volume"),
599
- gr.Slider(label="Other Volume")
600
  ],
601
- title="Resume Last Mix",
602
- description="Load saved mix session",
603
- allow_flagging="never"
604
  )
605
 
606
  # --- Prompt-Based Editing Tab ===
@@ -651,144 +574,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
651
  allow_flagging="never"
652
  )
653
 
654
- # --- Vocal Pitch Correction – Auto-Tune Style ===
655
- def auto_tune_vocal(audio_path, target_key="C"):
656
- try:
657
- # Placeholder for real-time pitch detection
658
- return apply_pitch_shift(AudioSegment.from_file(audio_path), 0.2)
659
- except Exception as e:
660
- return None
661
-
662
- # --- Create Karaoke Video from Audio + Lyrics ===
663
- def create_karaoke_video(audio_path, lyrics, bg_image=None):
664
- try:
665
- from moviepy.editor import TextClip, CompositeVideoClip, ColorClip, AudioFileClip
666
-
667
- audio = AudioFileClip(audio_path)
668
- video = ColorClip(size=(1280, 720), color=(0, 0, 0), duration=audio.duration_seconds)
669
- words = [(word.strip(), i * 3, (i+1)*3) for i, word in enumerate(lyrics.split())]
670
-
671
- text_clips = [
672
- TextClip(word, fontsize=60, color='white').set_position('center').set_duration(end - start).set_start(start)
673
- for word, start, end in words
674
- ]
675
-
676
- final_video = CompositeVideoClip([video] + text_clips).set_audio(audio)
677
- out_path = os.path.join(tempfile.gettempdir(), "karaoke.mp4")
678
- final_video.write_videofile(out_path, codec="libx264", audio_codec="aac")
679
- return out_path
680
- except Exception as e:
681
- return f"⚠️ Failed: {str(e)}"
682
-
683
- # --- Vocal Doubler / Harmonizer ===
684
- def vocal_doubler(audio):
685
- shifted_up = apply_pitch_shift(audio, 0.3)
686
- shifted_down = apply_pitch_shift(audio, -0.3)
687
- return audio.overlay(shifted_up).overlay(shifted_down)
688
-
689
- # --- AI Suggest Preset Based on Genre ===
690
- def suggest_preset_by_genre(audio_path):
691
- try:
692
- y, sr = torchaudio.load(audio_path)
693
- mfccs = librosa.feature.mfcc(y=y.numpy().flatten(), sr=sr, n_mfcc=13).mean(axis=1).reshape(1, -1)
694
- return ["Vocal Clarity", "Limiter", "Stereo Expansion"]
695
- except Exception:
696
- return ["Default"]
697
-
698
- # --- AI Suggest Preset Based on Genre ===
699
- with gr.Tab("🧠 AI Suggest Preset"):
700
- gr.Interface(
701
- fn=suggest_preset_by_genre,
702
- inputs=gr.Audio(label="Upload Track", type="filepath"),
703
- outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
704
- title="Let AI Recommend Best Preset",
705
- description="Upload a track and let AI recommend the best preset based on genre."
706
- )
707
-
708
- # --- Prompt-Based Editing ===
709
- def process_prompt(audio_path, prompt):
710
- audio = AudioSegment.from_file(audio_path)
711
-
712
- if "noise" in prompt.lower() or "clean" in prompt.lower():
713
- audio = apply_noise_reduction(audio)
714
-
715
- if "normalize" in prompt.lower() or "loud" in prompt.lower():
716
- audio = apply_normalize(audio)
717
-
718
- if "bass" in prompt.lower() and ("boost" in prompt.lower()):
719
- audio = apply_bass_boost(audio)
720
-
721
- if "treble" in prompt.lower() or "high" in prompt.lower():
722
- audio = apply_treble_boost(audio)
723
-
724
- if "echo" in prompt.lower() or "reverb" in prompt.lower():
725
- audio = apply_reverb(audio)
726
-
727
- if "pitch" in prompt.lower() and "correct" in prompt.lower():
728
- audio = apply_pitch_correction(audio)
729
-
730
- if "harmony" in prompt.lower() or "double" in prompt.lower():
731
- audio = apply_harmony(audio)
732
-
733
- out_path = os.path.join(tempfile.gettempdir(), "prompt_output.wav")
734
- audio.export(out_path, format="wav")
735
- return out_path
736
-
737
- # --- Prompt-Based Editing Tab ===
738
- with gr.Tab("🧠 Prompt-Based Editing"):
739
- gr.Interface(
740
- fn=process_prompt,
741
- inputs=[
742
- gr.File(label="Upload Audio", type="filepath"),
743
- gr.Textbox(label="Describe What You Want", lines=5)
744
- ],
745
- outputs=gr.Audio(label="Edited Output", type="filepath"),
746
- title="Type Your Edits – AI Does the Rest",
747
- description="Say what you want done and let AI handle it.",
748
- allow_flagging="never"
749
- )
750
-
751
- # --- Vocal Pitch Correction (Auto-Tune) ===
752
- def apply_pitch_correction(audio, target_key="C"):
753
- return apply_pitch_shift(audio, 0.2)
754
-
755
- with gr.Tab("🧬 Vocal Pitch Correction"):
756
- gr.Interface(
757
- fn=auto_tune_vocal,
758
- inputs=[
759
- gr.File(label="Source Voice Clip"),
760
- gr.Textbox(label="Target Key", value="C", lines=1)
761
- ],
762
- outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
763
- title="Auto-Tune Style Pitch Correction",
764
- description="Correct vocal pitch automatically"
765
- )
766
-
767
- # --- Real-Time Spectrum Analyzer + EQ Preview ===
768
- def visualize_spectrum(audio_path):
769
- y, sr = torchaudio.load(audio_path)
770
- y_np = y.numpy().flatten()
771
- stft = librosa.stft(y_np)
772
- db = librosa.amplitude_to_db(abs(stft))
773
-
774
- plt.figure(figsize=(10, 4))
775
- img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
776
- plt.colorbar(img, format="%+2.0f dB")
777
- plt.title("Frequency Spectrum")
778
- plt.tight_layout()
779
- buf = BytesIO()
780
- plt.savefig(buf, format="png")
781
- plt.close()
782
- buf.seek(0)
783
- return Image.open(buf)
784
-
785
- with gr.Tab("📊 Frequency Spectrum"):
786
- gr.Interface(
787
- fn=visualize_spectrum,
788
- inputs=gr.Audio(label="Upload Track", type="filepath"),
789
- outputs=gr.Image(label="Spectrum Analysis"),
790
- title="Real-Time Spectrum Analyzer",
791
- description="See the frequency breakdown of your audio"
792
- )
793
-
794
  demo.launch()
 
5
  import tempfile
6
  import os
7
  import noisereduce as nr
 
8
  import torch
9
  from demucs import pretrained
10
  from demucs.apply import apply_model
11
  import torchaudio
 
12
  import matplotlib.pyplot as plt
13
  from io import BytesIO
14
  from PIL import Image
 
17
  import librosa
18
  import warnings
19
  from faster_whisper import WhisperModel
 
 
20
  from TTS.api import TTS
21
+ import base64
22
  import pickle
23
 
24
  # Suppress warnings
 
55
  def apply_pitch_shift(audio, semitones=-2):
56
  new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
57
  samples = np.array(audio.get_array_of_samples())
58
+ resampled = np.interp(np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples).astype(np.int16)
59
+ return AudioSegment(resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels)
 
 
 
 
 
 
 
 
 
60
 
61
  def apply_echo(audio, delay_ms=500, decay=0.5):
62
  echo = audio - 10
 
126
  adjusted.export(out_path, format="wav")
127
  return out_path
128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  # === Auto-EQ per Genre ===
130
  def auto_eq(audio, genre="Pop"):
131
  eq_map = {
132
+ "Pop": [(200, 500, -3), (2000, 4000, +4)],
133
+ "EDM": [(60, 250, +6), (8000, 12000, +3)],
134
+ "Rock": [(1000, 3000, +4), (7000, 10000, -3)],
135
+ "Hip-Hop": [(20, 100, +6), (7000, 10000, -4)],
136
+ "Acoustic": [(100, 300, -3), (4000, 8000, +2)],
137
+ "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
138
+ "Trap": [(80, 120, +6), (3000, 6000, -4)],
139
+ "LoFi": [(20, 200, +3), (1000, 3000, -2)]
 
140
  }
141
 
142
  from scipy.signal import butter, sosfilt
 
155
 
156
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
157
 
158
+ # === AI Mastering Chain – Genre EQ + Loudness Match + Limiting ===
159
+ def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
160
+ audio = AudioSegment.from_file(audio_path)
161
+ eq_audio = auto_eq(audio, genre=genre)
162
+ samples, sr = audiosegment_to_array(eq_audio)
163
+
164
+ # Apply loudness normalization
165
+ meter = pyln.Meter(sr)
166
+ loudness = meter.integrated_loudness(samples.astype(np.float64) / 32768.0)
167
+ gain_db = target_lufs - loudness
168
+ final_audio = eq_audio + gain_db
169
+ final_audio = apply_limiter(final_audio)
170
+
171
+ out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
172
+ final_audio.export(out_path, format="wav")
173
+ return out_path
174
+
175
  # === Harmonic Saturation / Exciter ===
176
  def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
177
  samples = np.array(audio.get_array_of_samples()).astype(np.float32)
 
189
 
190
  return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # === Process Audio Function ===
193
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
194
  status = "🔊 Loading audio..."
 
208
  "Normalize": apply_normalize,
209
  "Noise Gate": lambda x: apply_noise_gate(x, threshold=-50.0),
210
  "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
 
211
  "Flanger": lambda x: apply_phaser(x, rate=1.2, depth=0.9, mix=0.7),
212
  "Bitcrusher": lambda x: apply_bitcrush(x, bit_depth=8),
213
  "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
 
245
  status = f"❌ Error: {str(e)}"
246
  return None, None, status, "", status
247
 
248
+ # === Visualize Waveform ===
249
  def show_waveform(audio_file):
250
  try:
251
  audio = AudioSegment.from_file(audio_file)
 
269
  except Exception:
270
  return "Unknown"
271
 
 
272
  def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
273
  log = {
274
  "timestamp": str(datetime.datetime.now()),
 
300
 
301
  preset_names = list(preset_choices.keys())
302
 
303
+ # === Preset Cards Gallery ===
304
+ def get_preset_cards():
305
+ card_paths = []
306
+ for name in preset_names:
307
+ card_paths.append(f"https://via.placeholder.com/150x100?text={name}")
308
+ return card_paths
309
+
310
+ # === Load Preset by Name ===
311
+ def load_preset_by_card(name_index):
312
+ name = preset_names[name_index]
313
+ return name, preset_choices[name]
314
+
315
+ # === Logo Embedding (Base64 or file) ===
316
+ def get_logo():
317
+ try:
318
+ with open("logo.png", "rb") as img_file:
319
+ return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
320
+ except FileNotFoundError:
321
+ return "https://via.placeholder.com/400x100?text=AI+Audio+Studio"
322
+
323
+ # === Main UI ===
324
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
325
+ gr.HTML(f'<div class="studio-header"><img src="{get_logo()}" width="400" /></div>')
326
+ gr.Markdown("### Upload, edit, export — powered by AI!")
327
 
 
328
  with gr.Tab("🎵 Single File Studio"):
329
+ with gr.Row():
330
+ with gr.Column(min_width=300):
331
+ input_audio = gr.Audio(label="Upload Audio", type="filepath")
332
+ effect_checkbox = gr.CheckboxGroup(choices=preset_choices.get("Default", []),
333
+ label="Apply Effects in Order")
334
+ preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
335
+ export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
336
+ isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
337
+ submit_btn = gr.Button("Process Audio")
338
+ with gr.Column(min_width=300):
339
+ output_audio = gr.Audio(label="Processed Audio", type="filepath")
340
+ waveform_img = gr.Image(label="Waveform Preview")
341
+ genre_out = gr.Textbox(label="Detected Genre")
342
+ status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
343
+
344
+ submit_btn.click(fn=process_audio, inputs=[
345
+ input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
346
+ ], outputs=[output_audio, waveform_img, _, genre_out, status_box])
 
 
 
 
347
 
348
  # --- AI Mastering Chain Tab ===
349
  with gr.Tab("🎧 AI Mastering Chain"):
 
374
  description="Enhance clarity and presence using saturation styles like Tube or Tape."
375
  )
376
 
377
+ # --- Preset Cards Gallery ===
378
+ with gr.Tab("🎛 Preset Gallery"):
379
+ gr.Markdown("### Select a preset visually")
380
+
381
+ preset_images = [
382
+ ("https://via.placeholder.com/150x100?text=Pop", "Pop"),
383
+ ("https://via.placeholder.com/150x100?text=EDM", "EDM"),
384
+ ("https://via.placeholder.com/150x100?text=Rock", "Rock"),
385
+ ("https://via.placeholder.com/150x100?text=Hip-Hop", "Hip-Hop"),
386
+ ("https://via.placeholder.com/150x100?text=Acoustic", "Acoustic"),
387
+ ("https://via.placeholder.com/150x100?text=Tube+Saturation", "Tube"),
388
+ ("https://via.placeholder.com/150x100?text=Stage+Mode", "Stage Mode"),
389
+ ("https://via.placeholder.com/150x100?text=Vocal+Distortion", "Vocal Distortion")
390
+ ]
391
+
392
+ preset_gallery = gr.Gallery(value=preset_images, label="Preset Cards", columns=4, height="auto")
393
+ preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
394
+ preset_effects_out = gr.CheckboxGroup(choices=[e for e in preset_choices["Default"]], label="Effects")
395
+
396
+ def select_preset(evt: gr.SelectData):
397
+ selected = evt.index
398
+ name = preset_names[selected % len(preset_names)]
399
+ effects = preset_choices.get(name, [])
400
+ return name, effects
401
+
402
+ preset_gallery.select(fn=select_preset, inputs=[], outputs=[preset_name_out, preset_effects_out])
403
+
404
+ # --- Vocal Doubler / Harmonizer ===
405
+ with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
406
+ gr.Interface(
407
+ fn=lambda x: apply_harmony(x),
408
+ inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
409
+ outputs=gr.Audio(label="Doubled Output", type="filepath"),
410
+ title="Add Vocal Doubling / Harmony",
411
+ description="Enhance vocals with doubling or harmony",
412
+ allow_flagging="never"
413
+ )
414
+
415
  # --- Remix Mode ---
416
  with gr.Tab("🎛 Remix Mode"):
417
  gr.Interface(
 
464
  description="Correct vocal pitch automatically"
465
  )
466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
  # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
468
+ with gr.Tab("📊 Frequency Spectrum"):
469
  gr.Interface(
470
  fn=visualize_spectrum,
471
  inputs=gr.Audio(label="Upload Track", type="filepath"),
472
  outputs=gr.Image(label="Spectrum Analysis"),
473
+ title="Real-Time Spectrum Analyzer",
474
+ description="See the frequency breakdown of your audio"
475
  )
476
 
477
  # --- Loudness Graph Tab ===
 
502
  )
503
 
504
  # --- Save/Load Mix Session (.aiproj) ===
505
+ with gr.Tab("📁 Save/Load Project"):
506
  gr.Interface(
507
  fn=save_project,
508
  inputs=[
509
+ gr.File(label="Original Audio"),
510
+ gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
511
+ gr.CheckboxGroup(choices=[e for e in preset_choices.get("Default", [])], label="Applied Effects")
 
 
 
 
 
512
  ],
513
  outputs=gr.File(label="Project File (.aiproj)"),
514
+ title="Save Everything Together",
515
+ description="Save your session, effects, and settings in one file to reuse later."
516
  )
517
 
518
  gr.Interface(
519
  fn=load_project,
520
  inputs=gr.File(label="Upload .aiproj File"),
521
  outputs=[
522
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
523
+ gr.CheckboxGroup(choices=[e for e in preset_choices.get("Default", [])], label="Loaded Effects")
 
 
 
 
 
 
524
  ],
525
+ title="Resume Last Project",
526
+ description="Load your saved session"
 
527
  )
528
 
529
  # --- Prompt-Based Editing Tab ===
 
574
  allow_flagging="never"
575
  )
576
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  demo.launch()