tee342 commited on
Commit
f6738b1
·
verified ·
1 Parent(s): 847e7ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -58
app.py CHANGED
@@ -127,22 +127,97 @@ def apply_stage_mode(audio):
127
  processed = apply_bass_boost(processed, gain=6)
128
  return apply_limiter(processed, limit_dB=-2)
129
 
130
- # === Genre Mastering Presets ===
131
- genre_presets = {
132
- "Soul": ["Noise Reduction", "Bass Boost (+6dB)", "Mid Enhance"],
133
- "Funk": ["Treble Boost (+6dB)", "Compression", "Stereo Widening"],
134
- "Rock": ["Distortion", "Punchy Mids", "Reverb"],
135
- "Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
136
- "Acoustic": ["Natural Reverb", "Gentle Compression", "Mid Focus"],
137
- "Dance": ["Loudness Maximizer", "Bass Emphasis", "Stereo Widen"],
138
- "EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
139
- "Country": ["Clean Mix", "Subtle Reverb", "Mid Focus"],
140
- "Disco": ["Rhythmic Echo", "Bass Thump", "Treble Boost (+8dB)"],
141
- "Metal": ["Distortion", "High Gain", "Crisp Highs"],
142
- "Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"],
143
- "Trap": ["808 Bass", "Reverb", "Lo-Fi Texture"],
144
- "LoFi": ["Bitcrusher", "Tape Hiss", "Soft Compression"]
145
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
  # === Vocal Isolation Helpers ===
148
  def load_track_local(path, sample_rate, channels=2):
@@ -358,7 +433,7 @@ def transcribe_audio(audio_path):
358
  # === TTS Tab ===
359
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
360
 
361
- def generate_tts(text):
362
  out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
363
  tts.tts_to_file(text=text, file_path=out_path)
364
  return out_path
@@ -407,12 +482,6 @@ def mix_tracks(track1, track2, volume_offset=0):
407
  mixed.export(out_path, format="wav")
408
  return out_path
409
 
410
- # === Genre Mastering Tab ===
411
- def apply_genre_preset(audio, genre):
412
- global preset_choices
413
- selected_preset = genre_presets.get(genre, [])
414
- return process_audio(audio, selected_preset, False, genre, "WAV")[0]
415
-
416
  # === Dummy Voice Cloning Tab – Works Locally Only ===
417
  def clone_voice(*args):
418
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
@@ -440,7 +509,6 @@ def diarize_and_transcribe(audio_path):
440
  audio.export(temp_wav, format="wav")
441
 
442
  try:
443
- from pyannote.audio import Pipeline as DiarizationPipeline
444
  diarization = diarize_pipeline(temp_wav)
445
 
446
  result = whisper.transcribe(temp_wav)
@@ -517,7 +585,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
517
  gr.File(label="Upload Multiple Files", file_count="multiple"),
518
  gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
519
  gr.Checkbox(label="Isolate Vocals After Effects"),
520
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0] if preset_names else None),
521
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
522
  ],
523
  outputs=[
@@ -558,17 +626,32 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
558
  ],
559
  outputs=gr.Audio(label="Mastered Output", type="filepath"),
560
  title="Genre-Specific Mastering",
561
- description="Apply professionally tuned presets for popular music genres."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
  )
563
 
564
- # --- Transcribe & Edit Tab ===
565
- with gr.Tab("📝 Transcribe & Edit"):
566
  gr.Interface(
567
- fn=transcribe_audio,
568
- inputs=gr.Audio(label="Upload Audio", type="filepath"),
569
- outputs=gr.Textbox(label="Transcribed Text", lines=10),
570
- title="Transcribe & Edit Spoken Content",
571
- description="Convert voice to text and edit it before exporting again."
 
572
  )
573
 
574
  # --- Voice Cloning (Local Only) ===
@@ -655,29 +738,4 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
655
  description="Detect and trim silence at start/end or between words"
656
  )
657
 
658
- # --- Load/Save Project File (.aiproj) ===
659
- with gr.Tab("📁 Save/Load Project"):
660
- gr.Interface(
661
- fn=save_project,
662
- inputs=[
663
- gr.File(label="Original Audio"),
664
- gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
665
- gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
666
- ],
667
- outputs=gr.File(label="Project File (.aiproj)"),
668
- title="Save Everything Together",
669
- description="Save your session, effects, and settings in one file to reuse later."
670
- )
671
-
672
- gr.Interface(
673
- fn=load_project,
674
- inputs=gr.File(label="Upload .aiproj File"),
675
- outputs=[
676
- gr.Dropdown(choices=preset_names, label="Loaded Preset"),
677
- gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
678
- ],
679
- title="Resume Last Project",
680
- description="Load your saved session"
681
- )
682
-
683
  demo.launch()
 
127
  processed = apply_bass_boost(processed, gain=6)
128
  return apply_limiter(processed, limit_dB=-2)
129
 
130
+ # === Auto-EQ per Genre ===
131
+ def auto_eq(audio, genre="Pop"):
132
+ # Define frequency bands based on genre
133
+ eq_map = {
134
+ "Pop": [(200, 500, -3), (2000, 4000, +4)], # Cut muddiness, boost vocals
135
+ "EDM": [(60, 250, +6), (8000, 12000, +3)], # Maximize bass & sparkle
136
+ "Rock": [(1000, 3000, +4), (7000, 10000, -3)], # Punchy mids, reduce sibilance
137
+ "Hip-Hop": [(20, 100, +6), (7000, 10000, -4)], # Deep lows, smooth highs
138
+ "Acoustic": [(100, 300, -3), (4000, 8000, +2)], # Natural tone
139
+ "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)], # Clear low-mids, crisp highs
140
+ "Trap": [(80, 120, +6), (3000, 6000, -4)], # Sub-bass boost, cut harsh highs
141
+ "LoFi": [(20, 200, +3), (1000, 3000, -2)], # Warmth, soft mids
142
+ "Default": []
143
+ }
144
+
145
+ from scipy.signal import butter, sosfilt
146
+
147
+ def band_eq(samples, sr, lowcut, highcut, gain):
148
+ sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
149
+ filtered = sosfilt(sos, samples)
150
+ return samples + gain * filtered
151
+
152
+ samples, sr = audiosegment_to_array(audio)
153
+ samples = samples.astype(np.float64)
154
+
155
+ for band in eq_map.get(genre, []):
156
+ low, high, gain = band
157
+ samples = band_eq(samples, sr, low, high, gain)
158
+
159
+ return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
160
+
161
+ # === AI Voice Effects – Harmony / Doubling / Tuning ===
162
+ def pitch_correct(audio, target_key="C", semitones=None):
163
+ if semitones is None:
164
+ # Detect key and calculate needed shift
165
+ semitones = 0 # Placeholder
166
+ return apply_pitch_shift(audio, semitones)
167
+
168
+ def vocal_doubling(audio):
169
+ double1 = apply_pitch_shift(audio, 0.3)
170
+ double2 = apply_pitch_shift(audio, -0.3)
171
+ return audio.overlay(double1).overlay(double2)
172
+
173
+ # === Prompt-Based Editing ===
174
+ def process_prompt(audio_path, prompt):
175
+ prompt = prompt.lower()
176
+ audio = AudioSegment.from_file(audio_path)
177
+
178
+ if "noise" in prompt or "clean" in prompt:
179
+ audio = apply_noise_reduction(audio)
180
+
181
+ if "normalize" in prompt or "loud" in prompt:
182
+ audio = apply_normalize(audio)
183
+
184
+ if "bass" in prompt and ("boost" in prompt or "up" in prompt):
185
+ audio = apply_bass_boost(audio)
186
+
187
+ if "treble" in prompt or "highs" in prompt:
188
+ audio = apply_treble_boost(audio)
189
+
190
+ if "echo" in prompt or "reverb" in prompt:
191
+ audio = apply_reverb(audio)
192
+
193
+ if "pitch" in prompt and "correct" in prompt:
194
+ audio = pitch_correct(audio)
195
+
196
+ if "harmony" in prompt or "double" in prompt:
197
+ audio = vocal_doubling(audio)
198
+
199
+ out_path = os.path.join(tempfile.gettempdir(), "prompt_output.wav")
200
+ audio.export(out_path, format="wav")
201
+ return out_path
202
+
203
+ # === Spectrum Analyzer + EQ Visualizer ===
204
+ def visualize_spectrum(audio_path):
205
+ y, sr = torchaudio.load(audio_path)
206
+ y_np = y.numpy().flatten()
207
+
208
+ stft = librosa.stft(y_np)
209
+ db = librosa.amplitude_to_db(abs(stft))
210
+
211
+ plt.figure(figsize=(10, 4))
212
+ img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
213
+ plt.colorbar(img, format="%+2.0f dB")
214
+ plt.title("Frequency Spectrum")
215
+ plt.tight_layout()
216
+ buf = BytesIO()
217
+ plt.savefig(buf, format="png")
218
+ plt.close()
219
+ buf.seek(0)
220
+ return Image.open(buf)
221
 
222
  # === Vocal Isolation Helpers ===
223
  def load_track_local(path, sample_rate, channels=2):
 
433
  # === TTS Tab ===
434
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
435
 
436
+ def generate_tTS(text):
437
  out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
438
  tts.tts_to_file(text=text, file_path=out_path)
439
  return out_path
 
482
  mixed.export(out_path, format="wav")
483
  return out_path
484
 
 
 
 
 
 
 
485
  # === Dummy Voice Cloning Tab – Works Locally Only ===
486
  def clone_voice(*args):
487
  return "⚠️ Voice cloning requires local install – use Python 3.9 or below"
 
509
  audio.export(temp_wav, format="wav")
510
 
511
  try:
 
512
  diarization = diarize_pipeline(temp_wav)
513
 
514
  result = whisper.transcribe(temp_wav)
 
585
  gr.File(label="Upload Multiple Files", file_count="multiple"),
586
  gr.CheckboxGroup(choices=effect_options, label="Apply Effects in Order"),
587
  gr.Checkbox(label="Isolate Vocals After Effects"),
588
+ gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
589
  gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
590
  ],
591
  outputs=[
 
626
  ],
627
  outputs=gr.Audio(label="Mastered Output", type="filepath"),
628
  title="Genre-Specific Mastering",
629
+ description="Apply professionally tuned mastering settings for popular music genres."
630
+ )
631
+
632
+ # --- Prompt-Based Editing Tab ===
633
+ with gr.Tab("🧠 Prompt-Based Editing"):
634
+ gr.Interface(
635
+ fn=process_prompt,
636
+ inputs=[
637
+ gr.File(label="Upload Audio", type="filepath"),
638
+ gr.Textbox(label="Describe What You Want", lines=5)
639
+ ],
640
+ outputs=gr.Audio(label="Edited Output", type="filepath"),
641
+ title="Type Your Edits – AI Does the Rest",
642
+ description="Say what you want done and let AI handle it.",
643
+ allow_flagging="never"
644
  )
645
 
646
+ # --- Spectrum Analyzer Tab ===
647
+ with gr.Tab("📊 Frequency Spectrum"):
648
  gr.Interface(
649
+ fn=visualize_spectrum,
650
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
651
+ outputs=gr.Image(label="Spectrum Analysis"),
652
+ title="Real-Time Spectrum Analyzer",
653
+ description="See the frequency breakdown of your audio",
654
+ allow_flagging="never"
655
  )
656
 
657
  # --- Voice Cloning (Local Only) ===
 
738
  description="Detect and trim silence at start/end or between words"
739
  )
740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
  demo.launch()