tee342 commited on
Commit
94c3b1e
Β·
verified Β·
1 Parent(s): aa065d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -83
app.py CHANGED
@@ -9,6 +9,7 @@ import torch
9
  from demucs import pretrained
10
  from demucs.apply import apply_model
11
  import torchaudio
 
12
  import matplotlib.pyplot as plt
13
  from io import BytesIO
14
  from PIL import Image
@@ -18,8 +19,8 @@ import librosa
18
  import warnings
19
  from faster_whisper import WhisperModel
20
  from TTS.api import TTS
21
- import pickle
22
  import base64
 
23
 
24
  # Suppress warnings
25
  warnings.filterwarnings("ignore")
@@ -156,39 +157,47 @@ def auto_eq(audio, genre="Pop"):
156
 
157
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
158
 
159
- # === AI Mastering Chain – Genre EQ + Loudness Match + Limiting ===
160
- def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
161
- audio = AudioSegment.from_file(audio_path)
162
- eq_audio = auto_eq(audio, genre=genre)
163
- samples, sr = audiosegment_to_array(eq_audio)
164
-
165
- # Apply loudness normalization
166
- meter = pyln.Meter(sr)
167
- loudness = meter.integrated_loudness(samples.astype(np.float64) / 32768.0)
168
- gain_db = target_lufs - loudness
169
- final_audio = eq_audio + gain_db
170
- final_audio = apply_limiter(final_audio)
171
-
172
- out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
173
- final_audio.export(out_path, format="wav")
 
 
 
 
 
 
 
 
 
174
  return out_path
175
 
176
- # === Harmonic Saturation / Exciter ===
177
- def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
178
- samples = np.array(audio.get_array_of_samples()).astype(np.float32)
 
 
 
 
 
179
 
180
- if saturation_type == "Tube":
181
- saturated = np.tanh(intensity * samples)
182
- elif saturation_type == "Tape":
183
- saturated = np.where(samples > 0, 1 - np.exp(-intensity * samples), -1 + np.exp(intensity * samples))
184
- elif saturation_type == "Console":
185
- saturated = np.clip(samples, -32768, 32768) * intensity
186
- elif saturation_type == "Mix Bus":
187
- saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
188
- else:
189
- saturated = samples
190
 
191
- return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
192
 
193
  # === Process Audio Function ===
194
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
@@ -246,7 +255,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
246
  status = f"❌ Error: {str(e)}"
247
  return None, None, status, "", status
248
 
249
- # === Waveform + Spectrogram Generator ===
250
  def show_waveform(audio_file):
251
  try:
252
  audio = AudioSegment.from_file(audio_file)
@@ -301,25 +310,60 @@ preset_choices = {
301
 
302
  preset_names = list(preset_choices.keys())
303
 
304
- # === Preset Cards Gallery ===
305
- preset_cards = [
306
- ("images/pop_card.png", "Pop"),
307
- ("images/edm_card.png", "EDM"),
308
- ("images/rock_card.png", "Rock"),
309
- ("images/hiphop_card.png", "Hip-Hop"),
310
- ("images/acoustic_card.png", "Acoustic"),
311
- ("images/stage_mode_card.png", "Stage Mode"),
312
- ("images/vocal_distortion_card.png", "Vocal Distortion"),
313
- ("images/tube_saturation_card.png", "Tube Saturation")
314
- ]
315
-
316
- # === Logo Embedding (Base64 or file) ===
317
- def get_logo():
318
- return "logo.png"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
  # === Main UI ===
321
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
322
- gr.HTML(f'<div class="studio-header"><img src="{get_logo()}" width="400" /></div>')
323
  gr.Markdown("### Upload, edit, export β€” powered by AI!")
324
 
325
  with gr.Tab("🎡 Single File Studio"):
@@ -344,6 +388,23 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
344
  output_audio, waveform_img, session_log_out, genre_out, status_box
345
  ])
346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  # --- AI Mastering Chain Tab ===
348
  with gr.Tab("🎧 AI Mastering Chain"):
349
  gr.Interface(
@@ -376,9 +437,19 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
376
  # --- Preset Cards Gallery ===
377
  with gr.Tab("πŸŽ› Preset Gallery"):
378
  gr.Markdown("### Select a preset visually")
379
- preset_gallery = gr.Gallery(value=preset_cards, label="Preset Cards", columns=4, height="auto")
 
 
 
 
 
 
 
 
 
 
380
  preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
381
- preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices.keys())[0:], label="Effects")
382
 
383
  def load_preset_by_card(evt: gr.SelectData):
384
  index = evt.index % len(preset_names)
@@ -397,23 +468,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
397
  description="Enhance vocals with doubling or harmony"
398
  )
399
 
400
- # --- Remix Mode ---
401
- with gr.Tab("πŸŽ› Remix Mode"):
402
- gr.Interface(
403
- fn=stem_split,
404
- inputs=gr.Audio(label="Upload Music Track", type="filepath"),
405
- outputs=[
406
- gr.File(label="Vocals"),
407
- gr.File(label="Drums"),
408
- gr.File(label="Bass"),
409
- gr.File(label="Other")
410
- ],
411
- title="Split Into Drums, Bass, Vocals, and More",
412
- description="Use AI to separate musical elements like vocals, drums, and bass.",
413
- flagging_mode="never",
414
- clear_btn=None
415
- )
416
-
417
  # --- Batch Processing ---
418
  with gr.Tab("πŸ”Š Batch Processing"):
419
  gr.Interface(
@@ -450,23 +504,6 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
450
  )
451
 
452
  # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
453
- def visualize_spectrum(audio_path):
454
- y, sr = torchaudio.load(audio_path)
455
- y_np = y.numpy().flatten()
456
- stft = librosa.stft(y_np)
457
- db = librosa.amplitude_to_db(abs(stft))
458
-
459
- plt.figure(figsize=(10, 4))
460
- img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
461
- plt.colorbar(img, format="%+2.0f dB")
462
- plt.title("Frequency Spectrum")
463
- plt.tight_layout()
464
- buf = BytesIO()
465
- plt.savefig(buf, format="png")
466
- plt.close()
467
- buf.seek(0)
468
- return Image.open(buf)
469
-
470
  with gr.Tab("πŸ“Š Frequency Spectrum"):
471
  gr.Interface(
472
  fn=visualize_spectrum,
@@ -490,6 +527,22 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
490
  )
491
 
492
  # --- Save/Load Mix Session (.aiproj) ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  with gr.Tab("πŸ“ Save/Load Project"):
494
  gr.Interface(
495
  fn=save_project,
 
9
  from demucs import pretrained
10
  from demucs.apply import apply_model
11
  import torchaudio
12
+ from pathlib import Path
13
  import matplotlib.pyplot as plt
14
  from io import BytesIO
15
  from PIL import Image
 
19
  import warnings
20
  from faster_whisper import WhisperModel
21
  from TTS.api import TTS
 
22
  import base64
23
+ import pickle
24
 
25
  # Suppress warnings
26
  warnings.filterwarnings("ignore")
 
157
 
158
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
159
 
160
+ # === Vocal Isolation Helpers ===
161
+ def load_track_local(path, sample_rate, channels=2):
162
+ sig, rate = torchaudio.load(path)
163
+ if rate != sample_rate:
164
+ sig = torchaudio.functional.resample(sig, rate, sample_rate)
165
+ if channels == 1:
166
+ sig = sig.mean(0)
167
+ return sig
168
+
169
+ def save_track(path, wav, sample_rate):
170
+ path = Path(path)
171
+ torchaudio.save(str(path), wav, sample_rate)
172
+
173
+ def apply_vocal_isolation(audio_path):
174
+ model = pretrained.get_model(name='htdemucs')
175
+ wav = load_track_local(audio_path, model.samplerate, channels=2)
176
+ ref = wav.mean(0)
177
+ wav -= ref[:, None]
178
+ sources = apply_model(model, wav[None])[0]
179
+ wav += ref[:, None]
180
+
181
+ vocal_track = sources[3].cpu()
182
+ out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
183
+ save_track(out_path, vocal_track, model.samplerate)
184
  return out_path
185
 
186
+ # === Stem Splitting (Drums, Bass, Other, Vocals) – Now Defined! ===
187
+ def stem_split(audio_path):
188
+ model = pretrained.get_model(name='htdemucs')
189
+ wav = load_track_local(audio_path, model.samplerate, channels=2)
190
+ sources = apply_model(model, wav[None])[0]
191
+
192
+ output_dir = tempfile.mkdtemp()
193
+ stem_paths = []
194
 
195
+ for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
196
+ path = os.path.join(output_dir, f"{name}.wav")
197
+ save_track(path, sources[i].cpu(), model.samplerate)
198
+ stem_paths.append(gr.File(value=path))
 
 
 
 
 
 
199
 
200
+ return stem_paths
201
 
202
  # === Process Audio Function ===
203
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
 
255
  status = f"❌ Error: {str(e)}"
256
  return None, None, status, "", status
257
 
258
+ # === Visualize Waveform ===
259
  def show_waveform(audio_file):
260
  try:
261
  audio = AudioSegment.from_file(audio_file)
 
310
 
311
  preset_names = list(preset_choices.keys())
312
 
313
+ # === Batch Processing Function ===
314
+ def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
315
+ status = "πŸ”Š Loading files..."
316
+ try:
317
+ output_dir = tempfile.mkdtemp()
318
+ results = []
319
+ session_logs = []
320
+
321
+ for file in files:
322
+ processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)
323
+ results.append(processed_path)
324
+ session_logs.append(log)
325
+
326
+ zip_path = os.path.join(output_dir, "batch_output.zip")
327
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
328
+ for i, res in enumerate(results):
329
+ filename = f"processed_{i}.{export_format.lower()}"
330
+ zipf.write(res, filename)
331
+ zipf.writestr(f"session_info_{i}.json", session_logs[i])
332
+
333
+ return zip_path, "πŸ“¦ ZIP created successfully!"
334
+
335
+ except Exception as e:
336
+ return None, f"❌ Batch processing failed: {str(e)}"
337
+
338
+ # === Vocal Pitch Correction – Auto-Tune Style ===
339
+ def auto_tune_vocal(audio_path, target_key="C"):
340
+ try:
341
+ # Placeholder for real-time pitch detection
342
+ return apply_pitch_shift(AudioSegment.from_file(audio_path), 0.2)
343
+ except Exception as e:
344
+ return None
345
+
346
+ # === Real-Time Spectrum Analyzer + Live EQ Preview ===
347
+ def visualize_spectrum(audio_path):
348
+ y, sr = torchaudio.load(audio_path)
349
+ y_np = y.numpy().flatten()
350
+ stft = librosa.stft(y_np)
351
+ db = librosa.amplitude_to_db(abs(stft))
352
+
353
+ plt.figure(figsize=(10, 4))
354
+ img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
355
+ plt.colorbar(img, format="%+2.0f dB")
356
+ plt.title("Frequency Spectrum")
357
+ plt.tight_layout()
358
+ buf = BytesIO()
359
+ plt.savefig(buf, format="png")
360
+ plt.close()
361
+ buf.seek(0)
362
+ return Image.open(buf)
363
 
364
  # === Main UI ===
365
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
366
+ gr.HTML('<div class="studio-header"><img src="logo.png" width="400" /></div>')
367
  gr.Markdown("### Upload, edit, export β€” powered by AI!")
368
 
369
  with gr.Tab("🎡 Single File Studio"):
 
388
  output_audio, waveform_img, session_log_out, genre_out, status_box
389
  ])
390
 
391
+ # --- Remix Mode ---
392
+ with gr.Tab("πŸŽ› Remix Mode"):
393
+ gr.Interface(
394
+ fn=stem_split,
395
+ inputs=gr.Audio(label="Upload Music Track", type="filepath"),
396
+ outputs=[
397
+ gr.File(label="Vocals"),
398
+ gr.File(label="Drums"),
399
+ gr.File(label="Bass"),
400
+ gr.File(label="Other")
401
+ ],
402
+ title="Split Into Drums, Bass, Vocals, and More",
403
+ description="Use AI to separate musical elements like vocals, drums, and bass.",
404
+ flagging_mode="never",
405
+ clear_btn=None
406
+ )
407
+
408
  # --- AI Mastering Chain Tab ===
409
  with gr.Tab("🎧 AI Mastering Chain"):
410
  gr.Interface(
 
437
  # --- Preset Cards Gallery ===
438
  with gr.Tab("πŸŽ› Preset Gallery"):
439
  gr.Markdown("### Select a preset visually")
440
+ preset_gallery = gr.Gallery(value=[
441
+ ("images/pop_card.png", "Pop"),
442
+ ("images/edm_card.png", "EDM"),
443
+ ("images/rock_card.png", "Rock"),
444
+ ("images/hiphop_card.png", "Hip-Hop"),
445
+ ("images/acoustic_card.png", "Acoustic"),
446
+ ("images/stage_mode_card.png", "Stage Mode"),
447
+ ("images/vocal_distortion_card.png", "Vocal Distortion"),
448
+ ("images/tube_saturation_card.png", "Tube Saturation")
449
+ ], label="Preset Cards", columns=4, height="auto")
450
+
451
  preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
452
+ preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices["Default"]), label="Effects")
453
 
454
  def load_preset_by_card(evt: gr.SelectData):
455
  index = evt.index % len(preset_names)
 
468
  description="Enhance vocals with doubling or harmony"
469
  )
470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  # --- Batch Processing ---
472
  with gr.Tab("πŸ”Š Batch Processing"):
473
  gr.Interface(
 
504
  )
505
 
506
  # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  with gr.Tab("πŸ“Š Frequency Spectrum"):
508
  gr.Interface(
509
  fn=visualize_spectrum,
 
527
  )
528
 
529
  # --- Save/Load Mix Session (.aiproj) ===
530
+ def save_project(audio, preset, effects):
531
+ project_data = {
532
+ "audio": AudioSegment.from_file(audio).raw_data,
533
+ "preset": preset,
534
+ "effects": effects
535
+ }
536
+ out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
537
+ with open(out_path, "wb") as f:
538
+ pickle.dump(project_data, f)
539
+ return out_path
540
+
541
+ def load_project(project_file):
542
+ with open(project_file.name, "rb") as f:
543
+ data = pickle.load(f)
544
+ return data["preset"], data["effects"]
545
+
546
  with gr.Tab("πŸ“ Save/Load Project"):
547
  gr.Interface(
548
  fn=save_project,