tee342 commited on
Commit
b89a0ee
Β·
verified Β·
1 Parent(s): 2731f65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -27
app.py CHANGED
@@ -20,6 +20,7 @@ from faster_whisper import WhisperModel
20
  from TTS.api import TTS
21
  import base64
22
  import pickle
 
23
 
24
  # Suppress warnings
25
  warnings.filterwarnings("ignore")
@@ -136,7 +137,8 @@ def auto_eq(audio, genre="Pop"):
136
  "Acoustic": [(100, 300, -3), (4000, 8000, +2)],
137
  "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
138
  "Trap": [(80, 120, +6), (3000, 6000, -4)],
139
- "LoFi": [(20, 200, +3), (1000, 3000, -2)]
 
140
  }
141
 
142
  from scipy.signal import butter, sosfilt
@@ -245,7 +247,7 @@ def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, exp
245
  status = f"❌ Error: {str(e)}"
246
  return None, None, status, "", status
247
 
248
- # === Visualize Waveform ===
249
  def show_waveform(audio_file):
250
  try:
251
  audio = AudioSegment.from_file(audio_file)
@@ -308,17 +310,14 @@ def get_preset_cards():
308
  return card_paths
309
 
310
  # === Load Preset by Name ===
311
- def load_preset_by_card(name_index):
312
- name = preset_names[name_index]
 
313
  return name, preset_choices[name]
314
 
315
  # === Logo Embedding (Base64 or file) ===
316
  def get_logo():
317
- try:
318
- with open("logo.png", "rb") as img_file:
319
- return "data:image/png;base64," + base64.b64encode(img_file.read()).decode()
320
- except FileNotFoundError:
321
- return "https://via.placeholder.com/400x100?text=AI+Audio+Studio"
322
 
323
  # === Main UI ===
324
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
@@ -329,8 +328,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
329
  with gr.Row():
330
  with gr.Column(min_width=300):
331
  input_audio = gr.Audio(label="Upload Audio", type="filepath")
332
- effect_checkbox = gr.CheckboxGroup(choices=preset_choices.get("Default", []),
333
- label="Apply Effects in Order")
334
  preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
335
  export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
336
  isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
@@ -338,12 +336,15 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
338
  with gr.Column(min_width=300):
339
  output_audio = gr.Audio(label="Processed Audio", type="filepath")
340
  waveform_img = gr.Image(label="Waveform Preview")
341
- genre_out = gr.Textbox(label="Detected Genre")
 
342
  status_box = gr.Textbox(label="Status", value="βœ… Ready", lines=1)
343
 
344
  submit_btn.click(fn=process_audio, inputs=[
345
  input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
346
- ], outputs=[output_audio, waveform_img, _, genre_out, status_box])
 
 
347
 
348
  # --- AI Mastering Chain Tab ===
349
  with gr.Tab("🎧 AI Mastering Chain"):
@@ -384,22 +385,16 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
384
  ("https://via.placeholder.com/150x100?text=Rock", "Rock"),
385
  ("https://via.placeholder.com/150x100?text=Hip-Hop", "Hip-Hop"),
386
  ("https://via.placeholder.com/150x100?text=Acoustic", "Acoustic"),
387
- ("https://via.placeholder.com/150x100?text=Tube+Saturation", "Tube"),
388
  ("https://via.placeholder.com/150x100?text=Stage+Mode", "Stage Mode"),
389
  ("https://via.placeholder.com/150x100?text=Vocal+Distortion", "Vocal Distortion")
390
  ]
391
 
392
  preset_gallery = gr.Gallery(value=preset_images, label="Preset Cards", columns=4, height="auto")
393
  preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
394
- preset_effects_out = gr.CheckboxGroup(choices=[e for e in preset_choices["Default"]], label="Effects")
395
-
396
- def select_preset(evt: gr.SelectData):
397
- selected = evt.index
398
- name = preset_names[selected % len(preset_names)]
399
- effects = preset_choices.get(name, [])
400
- return name, effects
401
 
402
- preset_gallery.select(fn=select_preset, inputs=[], outputs=[preset_name_out, preset_effects_out])
403
 
404
  # --- Vocal Doubler / Harmonizer ===
405
  with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
@@ -408,8 +403,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
408
  inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
409
  outputs=gr.Audio(label="Doubled Output", type="filepath"),
410
  title="Add Vocal Doubling / Harmony",
411
- description="Enhance vocals with doubling or harmony",
412
- allow_flagging="never"
413
  )
414
 
415
  # --- Remix Mode ---
@@ -435,7 +429,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
435
  fn=batch_process_audio,
436
  inputs=[
437
  gr.File(label="Upload Multiple Files", file_count="multiple"),
438
- gr.CheckboxGroup(choices=preset_choices.get("Default", []), label="Apply Effects in Order"),
439
  gr.Checkbox(label="Isolate Vocals After Effects"),
440
  gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
441
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
@@ -465,6 +459,23 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
465
  )
466
 
467
  # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
468
  with gr.Tab("πŸ“Š Frequency Spectrum"):
469
  gr.Interface(
470
  fn=visualize_spectrum,
@@ -488,6 +499,10 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
488
  )
489
 
490
  # --- Stereo Imaging Tool ===
 
 
 
 
491
  with gr.Tab("🎚 Stereo Imaging"):
492
  gr.Interface(
493
  fn=stereo_imaging,
@@ -502,13 +517,29 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
502
  )
503
 
504
  # --- Save/Load Mix Session (.aiproj) ===
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  with gr.Tab("πŸ“ Save/Load Project"):
506
  gr.Interface(
507
  fn=save_project,
508
  inputs=[
509
  gr.File(label="Original Audio"),
510
  gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
511
- gr.CheckboxGroup(choices=[e for e in preset_choices.get("Default", [])], label="Applied Effects")
512
  ],
513
  outputs=gr.File(label="Project File (.aiproj)"),
514
  title="Save Everything Together",
@@ -520,13 +551,16 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
520
  inputs=gr.File(label="Upload .aiproj File"),
521
  outputs=[
522
  gr.Dropdown(choices=preset_names, label="Loaded Preset"),
523
- gr.CheckboxGroup(choices=[e for e in preset_choices.get("Default", [])], label="Loaded Effects")
524
  ],
525
  title="Resume Last Project",
526
  description="Load your saved session"
527
  )
528
 
529
  # --- Prompt-Based Editing Tab ===
 
 
 
530
  with gr.Tab("🧠 Prompt-Based Editing"):
531
  gr.Interface(
532
  fn=process_prompt,
 
20
  from TTS.api import TTS
21
  import base64
22
  import pickle
23
+ import json
24
 
25
  # Suppress warnings
26
  warnings.filterwarnings("ignore")
 
137
  "Acoustic": [(100, 300, -3), (4000, 8000, +2)],
138
  "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
139
  "Trap": [(80, 120, +6), (3000, 6000, -4)],
140
+ "LoFi": [(20, 200, +3), (1000, 3000, -2)],
141
+ "Default": []
142
  }
143
 
144
  from scipy.signal import butter, sosfilt
 
247
  status = f"❌ Error: {str(e)}"
248
  return None, None, status, "", status
249
 
250
+ # === Waveform + Spectrogram Generator ===
251
  def show_waveform(audio_file):
252
  try:
253
  audio = AudioSegment.from_file(audio_file)
 
310
  return card_paths
311
 
312
  # === Load Preset by Name ===
313
+ def load_preset_by_card(evt: gr.SelectData):
314
+ index = evt.index % len(preset_names)
315
+ name = preset_names[index]
316
  return name, preset_choices[name]
317
 
318
  # === Logo Embedding (Base64 or file) ===
319
  def get_logo():
320
+ return "https://via.placeholder.com/400x100?text=AI+Audio+Studio"
 
 
 
 
321
 
322
  # === Main UI ===
323
  with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
 
328
  with gr.Row():
329
  with gr.Column(min_width=300):
330
  input_audio = gr.Audio(label="Upload Audio", type="filepath")
331
+ effect_checkbox = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
 
332
  preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
333
  export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
334
  isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
 
336
  with gr.Column(min_width=300):
337
  output_audio = gr.Audio(label="Processed Audio", type="filepath")
338
  waveform_img = gr.Image(label="Waveform Preview")
339
+ session_log_out = gr.Textbox(label="Session Log", lines=5)
340
+ genre_out = gr.Textbox(label="Detected Genre", lines=1)
341
  status_box = gr.Textbox(label="Status", value="βœ… Ready", lines=1)
342
 
343
  submit_btn.click(fn=process_audio, inputs=[
344
  input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
345
+ ], outputs=[
346
+ output_audio, waveform_img, session_log_out, genre_out, status_box
347
+ ])
348
 
349
  # --- AI Mastering Chain Tab ===
350
  with gr.Tab("🎧 AI Mastering Chain"):
 
385
  ("https://via.placeholder.com/150x100?text=Rock", "Rock"),
386
  ("https://via.placeholder.com/150x100?text=Hip-Hop", "Hip-Hop"),
387
  ("https://via.placeholder.com/150x100?text=Acoustic", "Acoustic"),
388
+ ("https://via.placeholder.com/150x100?text=Tube", "Tube"),
389
  ("https://via.placeholder.com/150x100?text=Stage+Mode", "Stage Mode"),
390
  ("https://via.placeholder.com/150x100?text=Vocal+Distortion", "Vocal Distortion")
391
  ]
392
 
393
  preset_gallery = gr.Gallery(value=preset_images, label="Preset Cards", columns=4, height="auto")
394
  preset_name_out = gr.Dropdown(choices=preset_names, label="Selected Preset")
395
+ preset_effects_out = gr.CheckboxGroup(choices=list(preset_choices.keys())[0:], label="Effects")
 
 
 
 
 
 
396
 
397
+ preset_gallery.select(fn=load_preset_by_card, inputs=[], outputs=[preset_name_out, preset_effects_out])
398
 
399
  # --- Vocal Doubler / Harmonizer ===
400
  with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
 
403
  inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
404
  outputs=gr.Audio(label="Doubled Output", type="filepath"),
405
  title="Add Vocal Doubling / Harmony",
406
+ description="Enhance vocals with doubling or harmony"
 
407
  )
408
 
409
  # --- Remix Mode ---
 
429
  fn=batch_process_audio,
430
  inputs=[
431
  gr.File(label="Upload Multiple Files", file_count="multiple"),
432
+ gr.CheckboxGroup(choices=list(preset_choices["Default"]), label="Apply Effects in Order"),
433
  gr.Checkbox(label="Isolate Vocals After Effects"),
434
  gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
435
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
 
459
  )
460
 
461
  # --- Real-Time Spectrum Analyzer + Live EQ Preview ===
462
+ def visualize_spectrum(audio_path):
463
+ y, sr = torchaudio.load(audio_path)
464
+ y_np = y.numpy().flatten()
465
+ stft = librosa.stft(y_np)
466
+ db = librosa.amplitude_to_db(abs(stft))
467
+
468
+ plt.figure(figsize=(10, 4))
469
+ img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
470
+ plt.colorbar(img, format="%+2.0f dB")
471
+ plt.title("Frequency Spectrum")
472
+ plt.tight_layout()
473
+ buf = BytesIO()
474
+ plt.savefig(buf, format="png")
475
+ plt.close()
476
+ buf.seek(0)
477
+ return Image.open(buf)
478
+
479
  with gr.Tab("πŸ“Š Frequency Spectrum"):
480
  gr.Interface(
481
  fn=visualize_spectrum,
 
499
  )
500
 
501
  # --- Stereo Imaging Tool ===
502
+ def stereo_imaging(audio, mid_side_balance=0.5, stereo_spread=1.0):
503
+ samples, sr = audiosegment_to_array(AudioSegment.from_file(audio))
504
+ return array_to_audiosegment(samples, sr)
505
+
506
  with gr.Tab("🎚 Stereo Imaging"):
507
  gr.Interface(
508
  fn=stereo_imaging,
 
517
  )
518
 
519
  # --- Save/Load Mix Session (.aiproj) ===
520
+ def save_project(audio, preset, effects):
521
+ project_data = {
522
+ "audio": AudioSegment.from_file(audio).raw_data,
523
+ "preset": preset,
524
+ "effects": effects
525
+ }
526
+ out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
527
+ with open(out_path, "wb") as f:
528
+ pickle.dump(project_data, f)
529
+ return out_path
530
+
531
+ def load_project(project_file):
532
+ with open(project_file.name, "rb") as f:
533
+ data = pickle.load(f)
534
+ return data["preset"], data["effects"]
535
+
536
  with gr.Tab("πŸ“ Save/Load Project"):
537
  gr.Interface(
538
  fn=save_project,
539
  inputs=[
540
  gr.File(label="Original Audio"),
541
  gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
542
+ gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
543
  ],
544
  outputs=gr.File(label="Project File (.aiproj)"),
545
  title="Save Everything Together",
 
551
  inputs=gr.File(label="Upload .aiproj File"),
552
  outputs=[
553
  gr.Dropdown(choices=preset_names, label="Loaded Preset"),
554
+ gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
555
  ],
556
  title="Resume Last Project",
557
  description="Load your saved session"
558
  )
559
 
560
  # --- Prompt-Based Editing Tab ===
561
+ def process_prompt(audio, prompt):
562
+ return apply_noise_reduction(audio)
563
+
564
  with gr.Tab("🧠 Prompt-Based Editing"):
565
  gr.Interface(
566
  fn=process_prompt,