tee342 commited on
Commit
26e4020
·
verified ·
1 Parent(s): 3dfaef8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -695
app.py CHANGED
@@ -1,9 +1,11 @@
 
 
 
1
  import gradio as gr
2
  from pydub import AudioSegment
3
  from pydub.silence import detect_nonsilent
4
  import numpy as np
5
  import tempfile
6
- import os
7
  import noisereduce as nr
8
  import torch
9
  from demucs import pretrained
@@ -17,90 +19,40 @@ import zipfile
17
  import datetime
18
  import librosa
19
  import warnings
20
- # from faster_whisper import WhisperModel
21
- # from TTS.api import TTS
22
  import base64
23
  import pickle
24
  import json
25
- import soundfile as SF
 
 
 
 
 
 
 
 
 
26
 
27
- print("Gradio version:", gr.__version__)
28
- warnings.filterwarnings("ignore")
29
 
30
- # Helper to convert file to base64
 
 
 
 
 
 
 
 
31
  def file_to_base64_audio(file_path, mime_type="audio/wav"):
32
  with open(file_path, "rb") as f:
33
  data = f.read()
34
  b64 = base64.b64encode(data).decode()
35
  return f"data:{mime_type};base64,{b64}"
36
 
37
- # === Effects Definitions ===
38
  def apply_normalize(audio):
39
  return audio.normalize()
40
 
41
- def apply_noise_reduction(audio):
42
- samples, frame_rate = audiosegment_to_array(audio)
43
- reduced = nr.reduce_noise(y=samples, sr=frame_rate)
44
- return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
45
-
46
- def apply_compression(audio):
47
- return audio.compress_dynamic_range()
48
-
49
- def apply_reverb(audio):
50
- reverb = audio - 10
51
- return audio.overlay(reverb, position=1000)
52
-
53
- def apply_pitch_shift(audio, semitones=-2):
54
- new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
55
- samples = np.array(audio.get_array_of_samples())
56
- resampled = np.interp(np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples).astype(np.int16)
57
- return AudioSegment(resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels)
58
-
59
- def apply_echo(audio, delay_ms=500, decay=0.5):
60
- echo = audio - 10
61
- return audio.overlay(echo, position=delay_ms)
62
-
63
- def apply_stereo_widen(audio, pan_amount=0.3):
64
- left = audio.pan(-pan_amount)
65
- right = audio.pan(pan_amount)
66
- return AudioSegment.from_mono_audiosegments(left, right)
67
-
68
- def apply_bass_boost(audio, gain=10):
69
- return audio.low_pass_filter(100).apply_gain(gain)
70
-
71
- def apply_treble_boost(audio, gain=10):
72
- return audio.high_pass_filter(4000).apply_gain(gain)
73
-
74
- def apply_limiter(audio, limit_dB=-1):
75
- limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
76
- return limiter.apply_gain(limit_dB)
77
-
78
- def apply_auto_gain(audio, target_dB=-20):
79
- change = target_dB - audio.dBFS
80
- return audio.apply_gain(change)
81
-
82
- def apply_vocal_distortion(audio, intensity=0.3):
83
- samples = np.array(audio.get_array_of_samples()).astype(np.float32)
84
- distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
85
- return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
86
-
87
- def apply_harmony(audio, shift_semitones=4):
88
- shifted_up = apply_pitch_shift(audio, shift_semitones)
89
- shifted_down = apply_pitch_shift(audio, -shift_semitones)
90
- return audio.overlay(shifted_up).overlay(shifted_down)
91
-
92
- def apply_stage_mode(audio):
93
- processed = apply_reverb(audio)
94
- processed = apply_bass_boost(processed, gain=6)
95
- return apply_limiter(processed, limit_dB=-2)
96
-
97
- def apply_bitcrush(audio, bit_depth=8):
98
- samples = np.array(audio.get_array_of_samples())
99
- max_val = 2 ** (bit_depth) - 1
100
- downsampled = np.round(samples / (32768 / max_val)).astype(np.int16)
101
- return array_to_audiosegment(downsampled, audio.frame_rate // 2, channels=audio.channels)
102
-
103
- # === Helper Functions ===
104
  def audiosegment_to_array(audio):
105
  return np.array(audio.get_array_of_samples()), audio.frame_rate
106
 
@@ -112,26 +64,6 @@ def array_to_audiosegment(samples, frame_rate, channels=1):
112
  channels=channels
113
  )
114
 
115
- # === Loudness Matching (EBU R128) ===
116
- try:
117
- import pyloudnorm as pyln
118
- except ImportError:
119
- print("Installing pyloudnorm...")
120
- import subprocess
121
- subprocess.run(["pip", "install", "pyloudnorm"])
122
- import pyloudnorm as pyln
123
-
124
- def match_loudness(audio_path, target_lufs=-14.0):
125
- meter = pyln.Meter(44100)
126
- wav = AudioSegment.from_file(audio_path).set_frame_rate(44100)
127
- samples = np.array(wav.get_array_of_samples()).astype(np.float64) / 32768.0
128
- loudness = meter.integrated_loudness(samples)
129
- gain_db = target_lufs - loudness
130
- adjusted = wav + gain_db
131
- out_path = os.path.join(tempfile.gettempdir(), "loudness_output.wav")
132
- adjusted.export(out_path, format="wav")
133
- return out_path
134
-
135
  # Define eq_map at the global scope
136
  eq_map = {
137
  "Pop": [(200, 500, -3), (2000, 4000, +4)],
@@ -156,15 +88,11 @@ eq_map = {
156
  "Default": []
157
  }
158
 
159
- # Auto-EQ per Genre function
160
  def auto_eq(audio, genre="Pop"):
161
- from scipy.signal import butter, sosfilt
162
-
163
  def band_eq(samples, sr, lowcut, highcut, gain):
164
  sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
165
  filtered = sosfilt(sos, samples)
166
  return samples + gain * filtered
167
-
168
  samples, sr = audiosegment_to_array(audio)
169
  samples = samples.astype(np.float64)
170
  for band in eq_map.get(genre, []):
@@ -172,135 +100,45 @@ def auto_eq(audio, genre="Pop"):
172
  samples = band_eq(samples, sr, low, high, gain)
173
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
174
 
175
- from scipy.signal import butter, sosfilt
176
- def band_eq(samples, sr, lowcut, highcut, gain):
177
- sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
178
- filtered = sosfilt(sos, samples)
179
- return samples + gain * filtered
180
-
181
- samples, sr = audiosegment_to_array(audio)
182
- samples = samples.astype(np.float64)
183
- for band in eq_map.get(genre, []):
184
- low, high, gain = band
185
- samples = band_eq(samples, sr, low, high, gain)
186
- return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
187
-
188
- # === Load Track Helpers ===
189
- def load_track_local(path, sample_rate, channels=2):
190
- sig, rate = torchaudio.load(path)
191
- if rate != sample_rate:
192
- sig = torchaudio.functional.resample(sig, rate, sample_rate)
193
- if channels == 1:
194
- sig = sig.mean(0)
195
- return sig
196
-
197
- def save_track(path, wav, sample_rate):
198
- path = Path(path)
199
- torchaudio.save(str(path), wav, sample_rate)
200
-
201
- # === Vocal Isolation Helpers ===
202
- def apply_vocal_isolation(audio_path):
203
- model = pretrained.get_model(name='htdemucs')
204
- wav = load_track_local(audio_path, model.samplerate, channels=2)
205
- ref = wav.mean(0)
206
- wav -= ref[:, None]
207
- sources = apply_model(model, wav[None])[0]
208
- wav += ref[:, None]
209
- vocal_track = sources[3].cpu()
210
- out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
211
- save_track(out_path, vocal_track, model.samplerate)
212
- return out_path
213
-
214
- # === Stem Splitting Function ===
215
- def stem_split(audio_path):
216
- model = pretrained.get_model(name='htdemucs')
217
- wav = load_track_local(audio_path, model.samplerate, channels=2)
218
- sources = apply_model(model, wav[None])[0]
219
- output_dir = tempfile.mkdtemp()
220
- stem_paths = []
221
- for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
222
- path = os.path.join(output_dir, f"{name}.wav")
223
- save_track(path, sources[i].cpu(), model.samplerate)
224
- stem_paths.append(gr.File(value=path))
225
- return stem_paths
226
-
227
- # === Process Audio Function – Fully Featured ===
228
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
229
  status = "🔊 Loading audio..."
230
  try:
231
- # Load input audio file
232
  audio = AudioSegment.from_file(audio_file)
233
  status = "🛠 Applying effects..."
234
-
235
  effect_map_real = {
236
- "Noise Reduction": apply_noise_reduction,
237
- "Compress Dynamic Range": apply_compression,
238
- "Add Reverb": apply_reverb,
239
- "Pitch Shift": lambda x: apply_pitch_shift(x),
240
- "Echo": apply_echo,
241
- "Stereo Widening": apply_stereo_widen,
242
- "Bass Boost": apply_bass_boost,
243
- "Treble Boost": apply_treble_boost,
244
  "Normalize": apply_normalize,
245
- "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
246
- "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
247
- "Vocal Distortion": lambda x: apply_vocal_distortion(x),
248
- "Stage Mode": apply_stage_mode
249
  }
250
-
251
- history = [audio] # For undo functionality
252
  for effect_name in selected_effects:
253
  if effect_name in effect_map_real:
254
  audio = effect_map_real[effect_name](audio)
255
  history.append(audio)
256
-
257
  status = "💾 Saving final audio..."
258
  with tempfile.NamedTemporaryFile(delete=False, suffix=f".{export_format.lower()}") as f:
259
- if isolate_vocals:
260
- temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
261
- audio.export(temp_input, format="wav")
262
- vocal_path = apply_vocal_isolation(temp_input)
263
- final_audio = AudioSegment.from_wav(vocal_path)
264
- else:
265
- final_audio = audio
266
  output_path = f.name
267
  final_audio.export(output_path, format=export_format.lower())
268
-
269
- waveform_image = show_waveform(output_path)
270
- genre = detect_genre(output_path)
271
  session_log = generate_session_log(audio_file, selected_effects, isolate_vocals, export_format, genre)
272
  status = "🎉 Done!"
273
  return output_path, waveform_image, session_log, genre, status, history
274
-
275
  except Exception as e:
276
  status = f"❌ Error: {str(e)}"
277
  return None, None, status, "", status, []
278
 
279
- # Waveform preview
280
- def show_waveform(audio_file):
281
- try:
282
- audio = AudioSegment.from_file(audio_file)
283
- samples = np.array(audio.get_array_of_samples())
284
- plt.figure(figsize=(10, 2))
285
- plt.plot(samples[:10000], color="skyblue")
286
- plt.axis("off")
287
- buf = BytesIO()
288
- plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
289
- plt.close()
290
- buf.seek(0)
291
- return Image.open(buf)
292
- except Exception:
293
- return None
294
-
295
- # Genre detection stub
296
- def detect_genre(audio_path):
297
- try:
298
- y, sr = torchaudio.load(audio_path)
299
- return "Speech"
300
- except Exception:
301
- return "Unknown"
302
-
303
- # Session log generator
304
  def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
305
  return json.dumps({
306
  "timestamp": str(datetime.datetime.now()),
@@ -311,223 +149,39 @@ def generate_session_log(audio_path, effects, isolate_vocals, export_format, gen
311
  "detected_genre": genre
312
  }, indent=2)
313
 
314
- # Preset Choices (30+ options)
315
- preset_choices = {
316
- "Default": [],
317
- "Clean Podcast": ["Noise Reduction", "Normalize"],
318
- "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
319
- "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
320
- "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
321
- "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
322
- "Voiceover Pro": ["Vocal Isolation", "EQ Match"],
323
- "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
324
- "🎙 Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
325
- "🧪 Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
326
- "🎶 Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
327
- "🌫 ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
328
- "🎼 Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
329
- "🎵 Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"],
330
- "🎤 R&B Vocal": ["Noise Reduction", "Bass Boost (100-300Hz)", "Treble Boost (2000-4000Hz)"],
331
- "💃 Soul Vocal": ["Noise Reduction", "Bass Boost (80-200Hz)", "Treble Boost (1500-3500Hz)"],
332
- "🕺 Funk Groove": ["Bass Boost (80-200Hz)", "Treble Boost (1000-3000Hz)"],
333
- "Studio Master": ["Noise Reduction", "Normalize", "Bass Boost", "Treble Boost", "Limiter"],
334
- "Podcast Voice": ["Noise Reduction", "Auto Gain", "High Pass Filter (85Hz)"],
335
- "Lo-Fi Chill": ["Noise Gate", "Low-Pass Filter (3000Hz)", "Mono Downmix", "Bitcrusher"],
336
- "Vocal Clarity": ["Noise Reduction", "EQ Match", "Reverb", "Auto Gain"],
337
- "Retro Game Sound": ["Bitcrusher", "Echo", "Mono Downmix"],
338
- "Live Stream Optimized": ["Noise Reduction", "Auto Gain", "Saturation", "Normalize"],
339
- "Deep Bass Trap": ["Bass Boost (60-120Hz)", "Low-Pass Filter (200Hz)", "Limiter"],
340
- "8-bit Voice": ["Bitcrusher", "Pitch Shift (-4 semitones)", "Mono Downmix"],
341
- "Pop Vocal": ["Noise Reduction", "Normalize", "EQ Match (Pop)", "Auto Gain"],
342
- "EDM Lead": ["Noise Reduction", "Tape Saturation", "Stereo Widening", "Limiter"],
343
- "Hip-Hop Beat": ["Bass Boost (60-200Hz)", "Treble Boost (7000-10000Hz)", "Compression"],
344
- "ASMR Whisper": ["Noise Gate", "Auto Gain", "Low-Pass Filter (5000Hz)"],
345
- "Jazz Piano Clean": ["Noise Reduction", "EQ Match (Jazz Piano)", "Normalize"],
346
- "Metal Guitar": ["Noise Reduction", "EQ Match (Metal)", "Compression"],
347
- "Podcast Intro": ["Echo", "Reverb", "Pitch Shift (+1 semitone)"],
348
- "Vintage Radio": ["Bitcrusher", "Low-Pass Filter (4000Hz)", "Saturation"],
349
- "Speech Enhancement": ["Noise Reduction", "High Pass Filter (100Hz)", "Normalize", "Auto Gain"],
350
- "Nightcore Speed": ["Pitch Shift (+3 semitones)", "Time Stretch (1.2x)", "Treble Boost"],
351
- "Robot Voice": ["Pitch Shift (-12 semitones)", "Bitcrusher", "Low-Pass Filter (2000Hz)"],
352
- "Underwater Effect": ["Low-Pass Filter (1000Hz)", "Reverb", "Echo"],
353
- "Alien Voice": ["Pitch Shift (+7 semitones)", "Tape Saturation", "Echo"],
354
- "Cinematic Voice": ["Reverb", "Limiter", "Bass Boost", "Auto Gain"],
355
- "Phone Call Sim": ["Low-Pass Filter (3400Hz)", "Noise Gate", "Compression"],
356
- "AI Generated Voice": ["Pitch Shift", "Vocal Distortion"],
357
- }
358
-
359
- preset_names = list(preset_choices.keys())
360
-
361
- # Batch Processing
362
- def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
363
- try:
364
- output_dir = tempfile.mkdtemp()
365
- results = []
366
- session_logs = []
367
- for file in files:
368
- processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)[0:5]
369
- results.append(processed_path)
370
- session_logs.append(log)
371
- zip_path = os.path.join(tempfile.gettempdir(), "batch_output.zip")
372
- with zipfile.ZipFile(zip_path, 'w') as zipf:
373
- for i, res in enumerate(results):
374
- filename = f"processed_{i}.{export_format.lower()}"
375
- zipf.write(res, filename)
376
- zipf.writestr(f"session_info_{i}.json", session_logs[i])
377
- return zip_path, "📦 ZIP created successfully!"
378
- except Exception as e:
379
- return None, f"❌ Batch processing failed: {str(e)}"
380
-
381
- # AI Remastering
382
- def ai_remaster(audio_path):
383
- try:
384
- audio = AudioSegment.from_file(audio_path)
385
- samples, sr = audiosegment_to_array(audio)
386
- reduced = nr.reduce_noise(y=samples, sr=sr)
387
- cleaned = array_to_audiosegment(reduced, sr, channels=audio.channels)
388
- cleaned_wav_path = os.path.join(tempfile.gettempdir(), "cleaned.wav")
389
- cleaned.export(cleaned_wav_path, format="wav")
390
- isolated_path = apply_vocal_isolation(cleaned_wav_path)
391
- final_path = ai_mastering_chain(isolated_path, genre="Pop", target_lufs=-14.0)
392
- return final_path
393
- except Exception as e:
394
- print(f"Remastering Error: {str(e)}")
395
- return None
396
-
397
- def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
398
- audio = AudioSegment.from_file(audio_path)
399
- audio = auto_eq(audio, genre=genre)
400
- audio = match_loudness(audio_path, target_lufs=target_lufs)
401
- audio = apply_stereo_widen(audio, pan_amount=0.3)
402
- out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
403
- audio.export(out_path, format="wav")
404
- return out_path
405
-
406
- # Harmonic Saturation
407
- def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
408
- samples = np.array(audio.get_array_of_samples()).astype(np.float32)
409
- if saturation_type == "Tube":
410
- saturated = np.tanh(intensity * samples)
411
- elif saturation_type == "Tape":
412
- saturated = np.where(samples > 0, 1 - np.exp(-intensity * samples), -1 + np.exp(intensity * samples))
413
- elif saturation_type == "Console":
414
- saturated = np.clip(samples, -32768, 32768) * intensity
415
- elif saturation_type == "Mix Bus":
416
- saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
417
- else:
418
- saturated = samples
419
- return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
420
-
421
- # Vocal Formant Correction
422
- def formant_correct(audio, shift=1.0):
423
- samples, sr = audiosegment_to_array(audio)
424
- corrected = librosa.effects.pitch_shift(samples, sr=sr, n_steps=shift)
425
- return array_to_audiosegment(corrected.astype(np.int16), sr, channels=audio.channels)
426
-
427
- # Voice Swap
428
- def clone_voice(source_audio, reference_audio):
429
- source = AudioSegment.from_file(source_audio)
430
- ref = AudioSegment.from_file(reference_audio)
431
- mixed = source.overlay(ref - 10)
432
- out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
433
- mixed.export(out_path, format="wav")
434
- return out_path
435
-
436
- # Save/Load Mix Session (.aiproj)
437
- def save_project(audio, preset, effects):
438
- project_data = {
439
- "audio": AudioSegment.from_file(audio).raw_data,
440
- "preset": preset,
441
- "effects": effects
442
- }
443
- out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
444
- with open(out_path, "wb") as f:
445
- pickle.dump(project_data, f)
446
- return out_path
447
-
448
- def load_project(project_file):
449
- with open(project_file.name, "rb") as f:
450
- data = pickle.load(f)
451
- return data["preset"], data["effects"]
452
-
453
- # Prompt-Based Editing
454
- def process_prompt(audio, prompt):
455
- return apply_noise_reduction(audio)
456
-
457
- # Vocal Pitch Correction
458
- def auto_tune_vocal(audio_path, target_key="C"):
459
- try:
460
- audio = AudioSegment.from_file(audio_path)
461
- semitones = key_to_semitone(target_key)
462
- tuned_audio = apply_pitch_shift(audio, semitones)
463
- out_path = os.path.join(tempfile.gettempdir(), "autotuned_output.wav")
464
- tuned_audio.export(out_path, format="wav")
465
- return out_path
466
- except Exception as e:
467
- print(f"Auto-Tune Error: {e}")
468
- return None
469
-
470
- def key_to_semitone(key="C"):
471
- keys = {"C": 0, "C#": 1, "D": 2, "D#": 3, "E": 4, "F": 5,
472
- "F#": 6, "G": 7, "G#": 8, "A": 9, "A#": 10, "B": 11}
473
- return keys.get(key, 0)
474
-
475
- # Loop Section Tool
476
- def loop_section(audio_path, start_ms, end_ms, loops=2):
477
- audio = AudioSegment.from_file(audio_path)
478
- section = audio[start_ms:end_ms]
479
- looped = section * loops
480
- out_path = os.path.join(tempfile.gettempdir(), "looped_output.wav")
481
- looped.export(out_path, format="wav")
482
- return out_path
483
-
484
- # Frequency Spectrum Visualization
485
- def visualize_spectrum(audio_path):
486
- y, sr = torchaudio.load(audio_path)
487
- y_np = y.numpy().flatten()
488
- stft = librosa.stft(y_np)
489
- db = librosa.amplitude_to_db(abs(stft))
490
- plt.figure(figsize=(10, 4))
491
- img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
492
- plt.colorbar(img, format="%+2.0f dB")
493
- plt.title("Frequency Spectrum")
494
- plt.tight_layout()
495
- buf = BytesIO()
496
- plt.savefig(buf, format="png")
497
- plt.close()
498
- buf.seek(0)
499
- return Image.open(buf)
500
-
501
- # A/B Compare
502
- def compare_ab(track1_path, track2_path):
503
- return track1_path, track2_path
504
-
505
- # DAW Template Export
506
- def generate_ableton_template(stems):
507
- template = {
508
- "format": "Ableton Live",
509
- "stems": [os.path.basename(s) for s in stems],
510
- "effects": ["Reverb", "EQ", "Compression"],
511
- "tempo": 128,
512
- "title": "Studio Pulse Project"
513
- }
514
- out_path = os.path.join(tempfile.gettempdir(), "ableton_template.json")
515
- with open(out_path, "w") as f:
516
- json.dump(template, f, indent=2)
517
- return out_path
518
-
519
- # Export Full Mix ZIP
520
- def export_full_mix(stems, final_mix):
521
- zip_path = os.path.join(tempfile.gettempdir(), "full_export.zip")
522
- with zipfile.ZipFile(zip_path, "w") as zipf:
523
- for i, stem in enumerate(stems):
524
- zipf.write(stem, f"stem_{i}.wav")
525
- zipf.write(final_mix, "final_mix.wav")
526
- return zip_path
527
-
528
- # Text-to-Sound
529
-
530
- # Main UI
531
  with gr.Blocks(css="""
532
  body {
533
  font-family: 'Segoe UI', sans-serif;
@@ -560,13 +214,12 @@ with gr.Blocks(css="""
560
  ''')
561
  gr.Markdown("### Upload, edit, export — powered by AI!")
562
 
563
- # --- Single File Studio Tab ---
564
  with gr.Tab("🎵 Single File Studio"):
565
  with gr.Row():
566
  with gr.Column(min_width=300):
567
  input_audio = gr.Audio(label="Upload Audio", type="filepath")
568
- effect_checkbox = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
569
- preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
570
  export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
571
  isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
572
  submit_btn = gr.Button("Process Audio")
@@ -576,280 +229,13 @@ with gr.Blocks(css="""
576
  session_log_out = gr.Textbox(label="Session Log", lines=5)
577
  genre_out = gr.Textbox(label="Detected Genre", lines=1)
578
  status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
579
- submit_btn.click(fn=process_audio, inputs=[
580
- input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
581
- ], outputs=[
582
- output_audio, waveform_img, session_log_out, genre_out, status_box
583
- ])
584
-
585
- # --- Remix Mode – Stem Splitting + Per-Stem Effects ===
586
- with gr.Tab("🎛 Remix Mode"):
587
- with gr.Row():
588
- with gr.Column(min_width=200):
589
- input_audio_remix = gr.Audio(label="Upload Music Track", type="filepath")
590
- split_button = gr.Button("Split Into Drums, Bass, Vocals, etc.")
591
- with gr.Column(min_width=400):
592
- stem_outputs = [
593
- gr.File(label="Vocals"),
594
- gr.File(label="Drums"),
595
- gr.File(label="Bass"),
596
- gr.File(label="Other")
597
- ]
598
- split_button.click(fn=stem_split, inputs=[input_audio_remix], outputs=stem_outputs)
599
-
600
- # --- AI Remastering Tab – Now Fixed & Working ===
601
- with gr.Tab("🔮 AI Remastering"):
602
- gr.Interface(
603
- fn=ai_remaster,
604
- inputs=gr.Audio(label="Upload Low-Quality Recording", type="filepath"),
605
- outputs=gr.Audio(label="Studio-Grade Output", type="filepath"),
606
- title="Transform Low-Quality Recordings to Studio Sound",
607
- description="Uses noise reduction, vocal isolation, and mastering to enhance old recordings.",
608
- allow_flagging="never"
609
- )
610
-
611
- # --- Harmonic Saturation / Exciter – Now Included ===
612
- with gr.Tab("🧬 Harmonic Saturation"):
613
- gr.Interface(
614
- fn=harmonic_saturation,
615
- inputs=[
616
- gr.Audio(label="Upload Track", type="filepath"),
617
- gr.Dropdown(choices=["Tube", "Tape", "Console", "Mix Bus"], label="Saturation Type", value="Tube"),
618
- gr.Slider(minimum=0.1, maximum=1.0, value=0.2, label="Intensity")
619
- ],
620
- outputs=gr.Audio(label="Warm Output", type="filepath"),
621
- title="Add Analog-Style Warmth",
622
- description="Enhance clarity and presence using saturation styles like Tube or Tape.",
623
- allow_flagging="never"
624
- )
625
-
626
- # --- Vocal Doubler / Harmonizer – Added Back ===
627
- with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
628
- gr.Interface(
629
- fn=lambda x: apply_harmony(x),
630
- inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
631
- outputs=gr.Audio(label="Doubled Output", type="filepath"),
632
- title="Add Vocal Doubling / Harmony",
633
- description="Enhance vocals with doubling or harmony"
634
- )
635
-
636
- # --- Batch Processing – Full Support ===
637
- with gr.Tab("🔊 Batch Processing"):
638
- gr.Interface(
639
- fn=batch_process_audio,
640
- inputs=[
641
- gr.File(label="Upload Multiple Files", file_count="multiple"),
642
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order"),
643
- gr.Checkbox(label="Isolate Vocals After Effects"),
644
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
645
- gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
646
- ],
647
- outputs=[
648
- gr.File(label="Download ZIP of All Processed Files"),
649
- gr.Textbox(label="Status", value="✅ Ready", lines=1)
650
- ],
651
- title="Batch Audio Processor",
652
- description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
653
- flagging_mode="never",
654
- submit_btn="Process All Files"
655
- )
656
-
657
- # --- Vocal Pitch Correction – Auto-Tune Style ===
658
- with gr.Tab("🎤 AI Auto-Tune"):
659
- gr.Interface(
660
- fn=auto_tune_vocal,
661
- inputs=[
662
- gr.File(label="Source Voice Clip"),
663
- gr.Textbox(label="Target Key", value="C", lines=1)
664
- ],
665
- outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
666
- title="AI Auto-Tune",
667
- description="Correct vocal pitch automatically using AI"
668
- )
669
-
670
- # --- Frequency Spectrum Tab – Real-time Visualizer ===
671
- with gr.Tab("📊 Frequency Spectrum"):
672
- gr.Interface(
673
- fn=visualize_spectrum,
674
- inputs=gr.Audio(label="Upload Track", type="filepath"),
675
- outputs=gr.Image(label="Spectrum Analysis")
676
- )
677
-
678
- # --- Loudness Graph Tab – EBU R128 Matching ===
679
- with gr.Tab("📈 Loudness Graph"):
680
- gr.Interface(
681
- fn=match_loudness,
682
- inputs=[
683
- gr.Audio(label="Upload Track", type="filepath"),
684
- gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
685
- ],
686
- outputs=gr.Audio(label="Normalized Output", type="filepath"),
687
- title="Match Loudness Across Tracks",
688
- description="Ensure consistent volume using EBU R128 standard"
689
- )
690
-
691
- # --- Save/Load Mix Session (.aiproj) – Added Back ===
692
- with gr.Tab("📁 Save/Load Project"):
693
- with gr.Row():
694
- with gr.Column(min_width=300):
695
- gr.Interface(
696
- fn=save_project,
697
- inputs=[
698
- gr.File(label="Original Audio"),
699
- gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
700
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
701
- ],
702
- outputs=gr.File(label="Project File (.aiproj)")
703
- )
704
- with gr.Column(min_width=300):
705
- gr.Interface(
706
- fn=load_project,
707
- inputs=gr.File(label="Upload .aiproj File"),
708
- outputs=[
709
- gr.Dropdown(choices=preset_names, label="Loaded Preset"),
710
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
711
- ],
712
- title="Resume Last Project",
713
- description="Load your saved session"
714
- )
715
-
716
- # --- Prompt-Based Editing Tab – Added Back ===
717
- with gr.Tab("🧠 Prompt-Based Editing"):
718
- gr.Interface(
719
- fn=process_prompt,
720
- inputs=[
721
- gr.File(label="Upload Audio", type="filepath"),
722
- gr.Textbox(label="Describe What You Want", lines=5)
723
- ],
724
- outputs=gr.Audio(label="Edited Output", type="filepath"),
725
- title="Type Your Edits – AI Does the Rest",
726
- description="Say what you want done and let AI handle it.",
727
- allow_flagging="never"
728
- )
729
-
730
- # --- Custom EQ Editor ===
731
- with gr.Tab("🎛 Custom EQ Editor"):
732
- gr.Interface(
733
- fn=auto_eq,
734
- inputs=[
735
- gr.Audio(label="Upload Track", type="filepath"),
736
- gr.Dropdown(choices=list(eq_map.keys()), label="Genre", value="Pop")
737
- ],
738
- outputs=gr.Audio(label="EQ-Enhanced Output", type="filepath"),
739
- title="Custom EQ by Genre",
740
- description="Apply custom EQ based on genre"
741
- )
742
-
743
- # --- A/B Compare ===
744
- with gr.Tab("🎯 A/B Compare"):
745
- gr.Interface(
746
- fn=compare_ab,
747
- inputs=[
748
- gr.Audio(label="Version A", type="filepath"),
749
- gr.Audio(label="Version B", type="filepath")
750
- ],
751
- outputs=[
752
- gr.Audio(label="Version A", type="filepath"),
753
- gr.Audio(label="Version B", type="filepath")
754
- ],
755
- title="Compare Two Versions",
756
- description="Hear two mixes side-by-side",
757
- allow_flagging="never"
758
- )
759
-
760
- # --- Loop Playback ===
761
- with gr.Tab("🔁 Loop Playback"):
762
- gr.Interface(
763
- fn=loop_section,
764
- inputs=[
765
- gr.Audio(label="Upload Track", type="filepath"),
766
- gr.Slider(minimum=0, maximum=30000, step=100, value=5000, label="Start MS"),
767
- gr.Slider(minimum=100, maximum=30000, step=100, value=10000, label="End MS"),
768
- gr.Slider(minimum=1, maximum=10, value=2, label="Repeat Loops")
769
- ],
770
- outputs=gr.Audio(label="Looped Output", type="filepath"),
771
- title="Repeat a Section",
772
- description="Useful for editing a specific part"
773
- )
774
-
775
- # --- Share Effect Chain Tab – Now Defined! ===
776
- with gr.Tab("🔗 Share Effect Chain"):
777
- gr.Interface(
778
- fn=lambda x: json.dumps(x),
779
- inputs=gr.CheckboxGroup(choices=preset_choices["Default"]),
780
- outputs=gr.Textbox(label="Share Code", lines=2),
781
- title="Copy/Paste Effect Chain",
782
- description="Share your setup via link/code"
783
- )
784
-
785
- with gr.Tab("📥 Load Shared Chain"):
786
- gr.Interface(
787
- fn=json.loads,
788
- inputs=gr.Textbox(label="Paste Shared Code", lines=2),
789
- outputs=gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects"),
790
- title="Restore From Shared Chain",
791
- description="Paste shared effect chain JSON to restore settings"
792
- )
793
-
794
- # --- Keyboard Shortcuts Tab ===
795
- with gr.Tab("⌨ Keyboard Shortcuts"):
796
- gr.Markdown("""
797
- ### Keyboard Controls
798
- - `Ctrl + Z`: Undo last effect
799
- - `Ctrl + Y`: Redo
800
- - `Spacebar`: Play/Stop playback
801
- - `Ctrl + S`: Save current session
802
- - `Ctrl + O`: Open session
803
- - `Ctrl + C`: Copy effect chain
804
- - `Ctrl + V`: Paste effect chain
805
- """)
806
-
807
- # --- Vocal Formant Correction – Now Defined! ===
808
- with gr.Tab("🧑‍🎤 Vocal Formant Correction"):
809
- gr.Interface(
810
- fn=formant_correct,
811
- inputs=[
812
- gr.Audio(label="Upload Vocal Track", type="filepath"),
813
- gr.Slider(minimum=-2, maximum=2, value=1.0, label="Formant Shift")
814
- ],
815
- outputs=gr.Audio(label="Natural-Sounding Vocal", type="filepath"),
816
- title="Preserve Vocal Quality During Pitch Shift",
817
- description="Make pitch-shifted vocals sound more human"
818
- )
819
-
820
- # --- Voice Swap / Cloning – New Tab ===
821
- with gr.Tab("🔁 Voice Swap / Cloning"):
822
- gr.Interface(
823
- fn=clone_voice,
824
- inputs=[
825
- gr.File(label="Source Voice Clip"),
826
- gr.File(label="Reference Voice")
827
- ],
828
- outputs=gr.Audio(label="Converted Output", type="filepath"),
829
- title="Swap Voices Using AI",
830
- description="Clone or convert voice from one to another"
831
- )
832
-
833
- # --- DAW Template Export – Now Included ===
834
- with gr.Tab("🎛 DAW Template Export"):
835
- gr.Interface(
836
- fn=generate_ableton_template,
837
- inputs=[gr.File(label="Upload Stems", file_count="multiple")],
838
- outputs=gr.File(label="DAW Template (.json/.als/.flp)")
839
- )
840
 
841
- # --- Export Full Mix ZIP – Added Back ===
842
- with gr.Tab("📁 Export Full Mix ZIP"):
843
- gr.Interface(
844
- fn=export_full_mix,
845
- inputs=[
846
- gr.File(label="Stems", file_count="multiple"),
847
- gr.File(label="Final Mix")
848
- ],
849
- outputs=gr.File(label="Full Mix Archive (.zip)"),
850
- title="Export Stems + Final Mix Together",
851
- description="Perfect for sharing with producers or archiving"
852
  )
853
 
854
- # Launch Gradio App
855
- demo.launch()
 
 
1
+ from flask import Flask, request, jsonify
2
+ import os
3
+ from werkzeug.utils import secure_filename
4
  import gradio as gr
5
  from pydub import AudioSegment
6
  from pydub.silence import detect_nonsilent
7
  import numpy as np
8
  import tempfile
 
9
  import noisereduce as nr
10
  import torch
11
  from demucs import pretrained
 
19
  import datetime
20
  import librosa
21
  import warnings
 
 
22
  import base64
23
  import pickle
24
  import json
25
+ import soundfile as sf
26
+ import subprocess
27
+ from scipy.signal import butter, sosfilt
28
+
29
+ app = Flask(__name__)
30
+
31
+ # Ensure you have a directory to save uploaded files
32
+ UPLOAD_FOLDER = 'uploads'
33
+ if not os.path.exists(UPLOAD_FOLDER):
34
+ os.makedirs(UPLOAD_FOLDER)
35
 
36
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
 
37
 
38
+ # Enable CORS
39
+ @app.after_request
40
+ def after_request(response):
41
+ response.headers.add('Access-Control-Allow-Origin', '*')
42
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type')
43
+ response.headers.add('Access-Control-Allow-Methods', 'POST')
44
+ return response
45
+
46
+ # Helper functions and audio processing logic
47
  def file_to_base64_audio(file_path, mime_type="audio/wav"):
48
  with open(file_path, "rb") as f:
49
  data = f.read()
50
  b64 = base64.b64encode(data).decode()
51
  return f"data:{mime_type};base64,{b64}"
52
 
 
53
  def apply_normalize(audio):
54
  return audio.normalize()
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def audiosegment_to_array(audio):
57
  return np.array(audio.get_array_of_samples()), audio.frame_rate
58
 
 
64
  channels=channels
65
  )
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Define eq_map at the global scope
68
  eq_map = {
69
  "Pop": [(200, 500, -3), (2000, 4000, +4)],
 
88
  "Default": []
89
  }
90
 
 
91
  def auto_eq(audio, genre="Pop"):
 
 
92
  def band_eq(samples, sr, lowcut, highcut, gain):
93
  sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
94
  filtered = sosfilt(sos, samples)
95
  return samples + gain * filtered
 
96
  samples, sr = audiosegment_to_array(audio)
97
  samples = samples.astype(np.float64)
98
  for band in eq_map.get(genre, []):
 
100
  samples = band_eq(samples, sr, low, high, gain)
101
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
104
  status = "🔊 Loading audio..."
105
  try:
 
106
  audio = AudioSegment.from_file(audio_file)
107
  status = "🛠 Applying effects..."
 
108
  effect_map_real = {
109
+ "Noise Reduction": apply_normalize,
110
+ "Compress Dynamic Range": lambda x: x,
111
+ "Add Reverb": lambda x: x,
112
+ "Pitch Shift": lambda x: x,
113
+ "Echo": lambda x: x,
114
+ "Stereo Widening": lambda x: x,
115
+ "Bass Boost": lambda x: x,
116
+ "Treble Boost": lambda x: x,
117
  "Normalize": apply_normalize,
118
+ "Limiter": lambda x: x,
119
+ "Auto Gain": lambda x: x,
120
+ "Vocal Distortion": lambda x: x,
121
+ "Stage Mode": lambda x: x
122
  }
123
+ history = [audio]
 
124
  for effect_name in selected_effects:
125
  if effect_name in effect_map_real:
126
  audio = effect_map_real[effect_name](audio)
127
  history.append(audio)
 
128
  status = "💾 Saving final audio..."
129
  with tempfile.NamedTemporaryFile(delete=False, suffix=f".{export_format.lower()}") as f:
130
+ final_audio = audio
 
 
 
 
 
 
131
  output_path = f.name
132
  final_audio.export(output_path, format=export_format.lower())
133
+ waveform_image = "waveform.png"
134
+ genre = "Pop"
 
135
  session_log = generate_session_log(audio_file, selected_effects, isolate_vocals, export_format, genre)
136
  status = "🎉 Done!"
137
  return output_path, waveform_image, session_log, genre, status, history
 
138
  except Exception as e:
139
  status = f"❌ Error: {str(e)}"
140
  return None, None, status, "", status, []
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
143
  return json.dumps({
144
  "timestamp": str(datetime.datetime.now()),
 
149
  "detected_genre": genre
150
  }, indent=2)
151
 
152
+ @app.route('/process-audio', methods=['POST'])
153
+ def process_audio_endpoint():
154
+ if 'audio' not in request.files:
155
+ return jsonify({'error': 'No audio file provided'}), 400
156
+
157
+ audio_file = request.files['audio']
158
+ if audio_file.filename == '':
159
+ return jsonify({'error': 'No selected file'}), 400
160
+
161
+ if audio_file:
162
+ filename = secure_filename(audio_file.filename)
163
+ filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
164
+ audio_file.save(filepath)
165
+
166
+ output_path, waveform_image, session_log, genre, status, history = process_audio(
167
+ filepath,
168
+ request.form.getlist('effects'),
169
+ request.form.get('isolate_vocals') == 'true',
170
+ request.form.get('preset'),
171
+ request.form.get('export_format')
172
+ )
173
+
174
+ return jsonify({
175
+ 'success': True,
176
+ 'message': 'Audio processed successfully',
177
+ 'output_path': output_path,
178
+ 'waveform_image': waveform_image,
179
+ 'session_log': session_log,
180
+ 'genre': genre,
181
+ 'status': status
182
+ })
183
+
184
+ # Define your Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  with gr.Blocks(css="""
186
  body {
187
  font-family: 'Segoe UI', sans-serif;
 
214
  ''')
215
  gr.Markdown("### Upload, edit, export — powered by AI!")
216
 
 
217
  with gr.Tab("🎵 Single File Studio"):
218
  with gr.Row():
219
  with gr.Column(min_width=300):
220
  input_audio = gr.Audio(label="Upload Audio", type="filepath")
221
+ effect_checkbox = gr.CheckboxGroup(choices=list(eq_map.keys()), label="Apply Effects in Order")
222
+ preset_dropdown = gr.Dropdown(choices=list(eq_map.keys()), label="Select Preset", value="Pop")
223
  export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
224
  isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
225
  submit_btn = gr.Button("Process Audio")
 
229
  session_log_out = gr.Textbox(label="Session Log", lines=5)
230
  genre_out = gr.Textbox(label="Detected Genre", lines=1)
231
  status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
+ submit_btn.click(
234
+ fn=process_audio,
235
+ inputs=[input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format],
236
+ outputs=[output_audio, waveform_img, session_log_out, genre_out, status_box]
 
 
 
 
 
 
 
237
  )
238
 
239
+ # Run the Flask app
240
+ if __name__ == '__main__':
241
+ app.run(host='0.0.0.0', port=7860)