tee342 commited on
Commit
7b55c47
·
verified ·
1 Parent(s): db35bd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -865
app.py CHANGED
@@ -1,882 +1,39 @@
1
  import gradio as gr
2
- from pydub import AudioSegment
3
- from pydub.silence import detect_nonsilent
4
- import numpy as np
5
- import tempfile
6
- import os
7
- import noisereduce as nr
8
- import torch
9
- from demucs import pretrained
10
- from demucs.apply import apply_model
11
- import torchaudio
12
- from pathlib import Path
13
- import matplotlib.pyplot as plt
14
- from io import BytesIO
15
- from PIL import Image
16
- import zipfile
17
- import datetime
18
- import librosa
19
- import warnings
20
- # from faster_whisper import WhisperModel
21
- # from TTS.api import TTS
22
  import base64
23
- import pickle
24
  import json
25
- import soundfile as SF
26
-
27
- print("Gradio version:", gr.__version__)
28
- warnings.filterwarnings("ignore")
29
-
30
- # Helper to convert file to base64
31
- def file_to_base64_audio(file_path, mime_type="audio/wav"):
32
- with open(file_path, "rb") as f:
33
- data = f.read()
34
- b64 = base64.b64encode(data).decode()
35
- return f"data:{mime_type};base64,{b64}"
36
-
37
- # === Effects Definitions ===
38
- def apply_normalize(audio):
39
- return audio.normalize()
40
-
41
- def apply_noise_reduction(audio):
42
- samples, frame_rate = audiosegment_to_array(audio)
43
- reduced = nr.reduce_noise(y=samples, sr=frame_rate)
44
- return array_to_audiosegment(reduced, frame_rate, channels=audio.channels)
45
-
46
- def apply_compression(audio):
47
- return audio.compress_dynamic_range()
48
-
49
- def apply_reverb(audio):
50
- reverb = audio - 10
51
- return audio.overlay(reverb, position=1000)
52
-
53
- def apply_pitch_shift(audio, semitones=-2):
54
- new_frame_rate = int(audio.frame_rate * (2 ** (semitones / 12)))
55
- samples = np.array(audio.get_array_of_samples())
56
- resampled = np.interp(np.arange(0, len(samples), 2 ** (semitones / 12)), np.arange(len(samples)), samples).astype(np.int16)
57
- return AudioSegment(resampled.tobytes(), frame_rate=new_frame_rate, sample_width=audio.sample_width, channels=audio.channels)
58
-
59
- def apply_echo(audio, delay_ms=500, decay=0.5):
60
- echo = audio - 10
61
- return audio.overlay(echo, position=delay_ms)
62
-
63
- def apply_stereo_widen(audio, pan_amount=0.3):
64
- left = audio.pan(-pan_amount)
65
- right = audio.pan(pan_amount)
66
- return AudioSegment.from_mono_audiosegments(left, right)
67
-
68
- def apply_bass_boost(audio, gain=10):
69
- return audio.low_pass_filter(100).apply_gain(gain)
70
-
71
- def apply_treble_boost(audio, gain=10):
72
- return audio.high_pass_filter(4000).apply_gain(gain)
73
-
74
- def apply_limiter(audio, limit_dB=-1):
75
- limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
76
- return limiter.apply_gain(limit_dB)
77
-
78
- def apply_auto_gain(audio, target_dB=-20):
79
- change = target_dB - audio.dBFS
80
- return audio.apply_gain(change)
81
-
82
- def apply_vocal_distortion(audio, intensity=0.3):
83
- samples = np.array(audio.get_array_of_samples()).astype(np.float32)
84
- distorted = samples + intensity * np.sin(samples * 2 * np.pi / 32768)
85
- return array_to_audiosegment(distorted.astype(np.int16), audio.frame_rate, channels=audio.channels)
86
-
87
- def apply_harmony(audio, shift_semitones=4):
88
- shifted_up = apply_pitch_shift(audio, shift_semitones)
89
- shifted_down = apply_pitch_shift(audio, -shift_semitones)
90
- return audio.overlay(shifted_up).overlay(shifted_down)
91
-
92
- def apply_stage_mode(audio):
93
- processed = apply_reverb(audio)
94
- processed = apply_bass_boost(processed, gain=6)
95
- return apply_limiter(processed, limit_dB=-2)
96
-
97
- def apply_bitcrush(audio, bit_depth=8):
98
- samples = np.array(audio.get_array_of_samples())
99
- max_val = 2 ** (bit_depth) - 1
100
- downsampled = np.round(samples / (32768 / max_val)).astype(np.int16)
101
- return array_to_audiosegment(downsampled, audio.frame_rate // 2, channels=audio.channels)
102
-
103
- # === Helper Functions ===
104
- def audiosegment_to_array(audio):
105
- return np.array(audio.get_array_of_samples()), audio.frame_rate
106
-
107
- def array_to_audiosegment(samples, frame_rate, channels=1):
108
- return AudioSegment(
109
- samples.tobytes(),
110
- frame_rate=int(frame_rate),
111
- sample_width=samples.dtype.itemsize,
112
- channels=channels
113
- )
114
-
115
- # === Loudness Matching (EBU R128) ===
116
- try:
117
- import pyloudnorm as pyln
118
- except ImportError:
119
- print("Installing pyloudnorm...")
120
- import subprocess
121
- subprocess.run(["pip", "install", "pyloudnorm"])
122
- import pyloudnorm as pyln
123
-
124
- def match_loudness(audio_path, target_lufs=-14.0):
125
- meter = pyln.Meter(44100)
126
- wav = AudioSegment.from_file(audio_path).set_frame_rate(44100)
127
- samples = np.array(wav.get_array_of_samples()).astype(np.float64) / 32768.0
128
- loudness = meter.integrated_loudness(samples)
129
- gain_db = target_lufs - loudness
130
- adjusted = wav + gain_db
131
- out_path = os.path.join(tempfile.gettempdir(), "loudness_output.wav")
132
- adjusted.export(out_path, format="wav")
133
- return out_path
134
-
135
- # Define eq_map at the global scope
136
- eq_map = {
137
- "Pop": [(200, 500, -3), (2000, 4000, +4)],
138
- "EDM": [(60, 250, +6), (8000, 12000, +3)],
139
- "Rock": [(1000, 3000, +4), (7000, 10000, -3)],
140
- "Hip-Hop": [(20, 100, +6), (7000, 10000, -4)],
141
- "Acoustic": [(100, 300, -3), (4000, 8000, +2)],
142
- "Metal": [(100, 500, -4), (2000, 5000, +6), (7000, 12000, -3)],
143
- "Trap": [(80, 120, +6), (3000, 6000, -4)],
144
- "LoFi": [(20, 200, +3), (1000, 3000, -2)],
145
- "Jazz": [(100, 400, +2), (1500, 3000, +1)],
146
- "Classical": [(200, 1000, +1), (3000, 6000, +2)],
147
- "Chillhop": [(50, 200, +3), (2000, 5000, +1)],
148
- "Ambient": [(100, 500, +4), (6000, 12000, +2)],
149
- "Jazz Piano": [(100, 1000, +3), (2000, 5000, +2)],
150
- "Trap EDM": [(60, 120, +6), (2000, 5000, -3)],
151
- "Indie Rock": [(150, 400, +2), (2000, 5000, +3)],
152
- "Lo-Fi Jazz": [(80, 200, +3), (2000, 4000, -1)],
153
- "R&B": [(100, 300, +4), (2000, 4000, +3)],
154
- "Soul": [(80, 200, +3), (1500, 3500, +4)],
155
- "Funk": [(80, 200, +5), (1000, 3000, +3)],
156
- "Default": []
157
- }
158
-
159
- # Auto-EQ per Genre function
160
- def auto_eq(audio, genre="Pop"):
161
- from scipy.signal import butter, sosfilt
162
-
163
- def band_eq(samples, sr, lowcut, highcut, gain):
164
- sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
165
- filtered = sosfilt(sos, samples)
166
- return samples + gain * filtered
167
-
168
- samples, sr = audiosegment_to_array(audio)
169
- samples = samples.astype(np.float64)
170
- for band in eq_map.get(genre, []):
171
- low, high, gain = band
172
- samples = band_eq(samples, sr, low, high, gain)
173
- return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
174
-
175
- from scipy.signal import butter, sosfilt
176
- def band_eq(samples, sr, lowcut, highcut, gain):
177
- sos = butter(10, [lowcut, highcut], btype='band', output='sos', fs=sr)
178
- filtered = sosfilt(sos, samples)
179
- return samples + gain * filtered
180
-
181
- samples, sr = audiosegment_to_array(audio)
182
- samples = samples.astype(np.float64)
183
- for band in eq_map.get(genre, []):
184
- low, high, gain = band
185
- samples = band_eq(samples, sr, low, high, gain)
186
- return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
187
-
188
- # === Load Track Helpers ===
189
- def load_track_local(path, sample_rate, channels=2):
190
- sig, rate = torchaudio.load(path)
191
- if rate != sample_rate:
192
- sig = torchaudio.functional.resample(sig, rate, sample_rate)
193
- if channels == 1:
194
- sig = sig.mean(0)
195
- return sig
196
-
197
- def save_track(path, wav, sample_rate):
198
- path = Path(path)
199
- torchaudio.save(str(path), wav, sample_rate)
200
-
201
- # === Vocal Isolation Helpers ===
202
- def apply_vocal_isolation(audio_path):
203
- model = pretrained.get_model(name='htdemucs')
204
- wav = load_track_local(audio_path, model.samplerate, channels=2)
205
- ref = wav.mean(0)
206
- wav -= ref[:, None]
207
- sources = apply_model(model, wav[None])[0]
208
- wav += ref[:, None]
209
- vocal_track = sources[3].cpu()
210
- out_path = os.path.join(tempfile.gettempdir(), "vocals.wav")
211
- save_track(out_path, vocal_track, model.samplerate)
212
- return out_path
213
-
214
- # === Stem Splitting Function ===
215
- def stem_split(audio_path):
216
- model = pretrained.get_model(name='htdemucs')
217
- wav = load_track_local(audio_path, model.samplerate, channels=2)
218
- sources = apply_model(model, wav[None])[0]
219
- output_dir = tempfile.mkdtemp()
220
- stem_paths = []
221
- for i, name in enumerate(['drums', 'bass', 'other', 'vocals']):
222
- path = os.path.join(output_dir, f"{name}.wav")
223
- save_track(path, sources[i].cpu(), model.samplerate)
224
- stem_paths.append(gr.File(value=path))
225
- return stem_paths
226
-
227
- # === Process Audio Function – Fully Featured ===
228
- def process_audio(audio_file, selected_effects, isolate_vocals, preset_name, export_format):
229
- status = "🔊 Loading audio..."
230
- try:
231
- # Load input audio file
232
- audio = AudioSegment.from_file(audio_file)
233
- status = "🛠 Applying effects..."
234
-
235
- effect_map_real = {
236
- "Noise Reduction": apply_noise_reduction,
237
- "Compress Dynamic Range": apply_compression,
238
- "Add Reverb": apply_reverb,
239
- "Pitch Shift": lambda x: apply_pitch_shift(x),
240
- "Echo": apply_echo,
241
- "Stereo Widening": apply_stereo_widen,
242
- "Bass Boost": apply_bass_boost,
243
- "Treble Boost": apply_treble_boost,
244
- "Normalize": apply_normalize,
245
- "Limiter": lambda x: apply_limiter(x, limit_dB=-1),
246
- "Auto Gain": lambda x: apply_auto_gain(x, target_dB=-20),
247
- "Vocal Distortion": lambda x: apply_vocal_distortion(x),
248
- "Stage Mode": apply_stage_mode
249
- }
250
-
251
- history = [audio] # For undo functionality
252
- for effect_name in selected_effects:
253
- if effect_name in effect_map_real:
254
- audio = effect_map_real[effect_name](audio)
255
- history.append(audio)
256
-
257
- status = "💾 Saving final audio..."
258
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{export_format.lower()}") as f:
259
- if isolate_vocals:
260
- temp_input = os.path.join(tempfile.gettempdir(), "input.wav")
261
- audio.export(temp_input, format="wav")
262
- vocal_path = apply_vocal_isolation(temp_input)
263
- final_audio = AudioSegment.from_wav(vocal_path)
264
- else:
265
- final_audio = audio
266
- output_path = f.name
267
- final_audio.export(output_path, format=export_format.lower())
268
-
269
- waveform_image = show_waveform(output_path)
270
- genre = detect_genre(output_path)
271
- session_log = generate_session_log(audio_file, selected_effects, isolate_vocals, export_format, genre)
272
- status = "🎉 Done!"
273
- return output_path, waveform_image, session_log, genre, status, history
274
-
275
- except Exception as e:
276
- status = f"❌ Error: {str(e)}"
277
- return None, None, status, "", status, []
278
-
279
- # Waveform preview
280
- def show_waveform(audio_file):
281
- try:
282
- audio = AudioSegment.from_file(audio_file)
283
- samples = np.array(audio.get_array_of_samples())
284
- plt.figure(figsize=(10, 2))
285
- plt.plot(samples[:10000], color="skyblue")
286
- plt.axis("off")
287
- buf = BytesIO()
288
- plt.savefig(buf, format="png", bbox_inches="tight", dpi=100)
289
- plt.close()
290
- buf.seek(0)
291
- return Image.open(buf)
292
- except Exception:
293
- return None
294
-
295
- # Genre detection stub
296
- def detect_genre(audio_path):
297
- try:
298
- y, sr = torchaudio.load(audio_path)
299
- return "Speech"
300
- except Exception:
301
- return "Unknown"
302
-
303
- # Session log generator
304
- def generate_session_log(audio_path, effects, isolate_vocals, export_format, genre):
305
- return json.dumps({
306
- "timestamp": str(datetime.datetime.now()),
307
- "filename": os.path.basename(audio_path),
308
- "effects_applied": effects,
309
- "isolate_vocals": isolate_vocals,
310
- "export_format": export_format,
311
- "detected_genre": genre
312
- }, indent=2)
313
-
314
- # Preset Choices (30+ options)
315
- preset_choices = {
316
- "Default": [],
317
- "Clean Podcast": ["Noise Reduction", "Normalize"],
318
- "Podcast Mastered": ["Noise Reduction", "Normalize", "Compress Dynamic Range"],
319
- "Radio Ready": ["Bass Boost", "Treble Boost", "Limiter"],
320
- "Music Production": ["Reverb", "Stereo Widening", "Pitch Shift"],
321
- "ASMR Creator": ["Noise Gate", "Auto Gain", "Low-Pass Filter"],
322
- "Voiceover Pro": ["Vocal Isolation", "EQ Match"],
323
- "8-bit Retro": ["Bitcrusher", "Echo", "Mono Downmix"],
324
- "🎙 Clean Vocal": ["Noise Reduction", "Normalize", "High Pass Filter (80Hz)"],
325
- "🧪 Vocal Distortion": ["Vocal Distortion", "Reverb", "Compress Dynamic Range"],
326
- "🎶 Singer's Harmony": ["Harmony", "Stereo Widening", "Pitch Shift"],
327
- "🌫 ASMR Vocal": ["Auto Gain", "Low-Pass Filter (3000Hz)", "Noise Gate"],
328
- "🎼 Stage Mode": ["Reverb", "Bass Boost", "Limiter"],
329
- "🎵 Auto-Tune Style": ["Pitch Shift (+1 semitone)", "Normalize", "Treble Boost"],
330
- "🎤 R&B Vocal": ["Noise Reduction", "Bass Boost (100-300Hz)", "Treble Boost (2000-4000Hz)"],
331
- "💃 Soul Vocal": ["Noise Reduction", "Bass Boost (80-200Hz)", "Treble Boost (1500-3500Hz)"],
332
- "🕺 Funk Groove": ["Bass Boost (80-200Hz)", "Treble Boost (1000-3000Hz)"],
333
- "Studio Master": ["Noise Reduction", "Normalize", "Bass Boost", "Treble Boost", "Limiter"],
334
- "Podcast Voice": ["Noise Reduction", "Auto Gain", "High Pass Filter (85Hz)"],
335
- "Lo-Fi Chill": ["Noise Gate", "Low-Pass Filter (3000Hz)", "Mono Downmix", "Bitcrusher"],
336
- "Vocal Clarity": ["Noise Reduction", "EQ Match", "Reverb", "Auto Gain"],
337
- "Retro Game Sound": ["Bitcrusher", "Echo", "Mono Downmix"],
338
- "Live Stream Optimized": ["Noise Reduction", "Auto Gain", "Saturation", "Normalize"],
339
- "Deep Bass Trap": ["Bass Boost (60-120Hz)", "Low-Pass Filter (200Hz)", "Limiter"],
340
- "8-bit Voice": ["Bitcrusher", "Pitch Shift (-4 semitones)", "Mono Downmix"],
341
- "Pop Vocal": ["Noise Reduction", "Normalize", "EQ Match (Pop)", "Auto Gain"],
342
- "EDM Lead": ["Noise Reduction", "Tape Saturation", "Stereo Widening", "Limiter"],
343
- "Hip-Hop Beat": ["Bass Boost (60-200Hz)", "Treble Boost (7000-10000Hz)", "Compression"],
344
- "ASMR Whisper": ["Noise Gate", "Auto Gain", "Low-Pass Filter (5000Hz)"],
345
- "Jazz Piano Clean": ["Noise Reduction", "EQ Match (Jazz Piano)", "Normalize"],
346
- "Metal Guitar": ["Noise Reduction", "EQ Match (Metal)", "Compression"],
347
- "Podcast Intro": ["Echo", "Reverb", "Pitch Shift (+1 semitone)"],
348
- "Vintage Radio": ["Bitcrusher", "Low-Pass Filter (4000Hz)", "Saturation"],
349
- "Speech Enhancement": ["Noise Reduction", "High Pass Filter (100Hz)", "Normalize", "Auto Gain"],
350
- "Nightcore Speed": ["Pitch Shift (+3 semitones)", "Time Stretch (1.2x)", "Treble Boost"],
351
- "Robot Voice": ["Pitch Shift (-12 semitones)", "Bitcrusher", "Low-Pass Filter (2000Hz)"],
352
- "Underwater Effect": ["Low-Pass Filter (1000Hz)", "Reverb", "Echo"],
353
- "Alien Voice": ["Pitch Shift (+7 semitones)", "Tape Saturation", "Echo"],
354
- "Cinematic Voice": ["Reverb", "Limiter", "Bass Boost", "Auto Gain"],
355
- "Phone Call Sim": ["Low-Pass Filter (3400Hz)", "Noise Gate", "Compression"],
356
- "AI Generated Voice": ["Pitch Shift", "Vocal Distortion"],
357
- }
358
-
359
- preset_names = list(preset_choices.keys())
360
-
361
- # Batch Processing
362
- def batch_process_audio(files, selected_effects, isolate_vocals, preset_name, export_format):
363
- try:
364
- output_dir = tempfile.mkdtemp()
365
- results = []
366
- session_logs = []
367
- for file in files:
368
- processed_path, _, log, _, _ = process_audio(file.name, selected_effects, isolate_vocals, preset_name, export_format)[0:5]
369
- results.append(processed_path)
370
- session_logs.append(log)
371
- zip_path = os.path.join(tempfile.gettempdir(), "batch_output.zip")
372
- with zipfile.ZipFile(zip_path, 'w') as zipf:
373
- for i, res in enumerate(results):
374
- filename = f"processed_{i}.{export_format.lower()}"
375
- zipf.write(res, filename)
376
- zipf.writestr(f"session_info_{i}.json", session_logs[i])
377
- return zip_path, "📦 ZIP created successfully!"
378
- except Exception as e:
379
- return None, f"❌ Batch processing failed: {str(e)}"
380
-
381
- # AI Remastering
382
- def ai_remaster(audio_path):
383
- try:
384
- audio = AudioSegment.from_file(audio_path)
385
- samples, sr = audiosegment_to_array(audio)
386
- reduced = nr.reduce_noise(y=samples, sr=sr)
387
- cleaned = array_to_audiosegment(reduced, sr, channels=audio.channels)
388
- cleaned_wav_path = os.path.join(tempfile.gettempdir(), "cleaned.wav")
389
- cleaned.export(cleaned_wav_path, format="wav")
390
- isolated_path = apply_vocal_isolation(cleaned_wav_path)
391
- final_path = ai_mastering_chain(isolated_path, genre="Pop", target_lufs=-14.0)
392
- return final_path
393
- except Exception as e:
394
- print(f"Remastering Error: {str(e)}")
395
- return None
396
-
397
- def ai_mastering_chain(audio_path, genre="Pop", target_lufs=-14.0):
398
- audio = AudioSegment.from_file(audio_path)
399
- audio = auto_eq(audio, genre=genre)
400
- audio = match_loudness(audio_path, target_lufs=target_lufs)
401
- audio = apply_stereo_widen(audio, pan_amount=0.3)
402
- out_path = os.path.join(tempfile.gettempdir(), "mastered_output.wav")
403
- audio.export(out_path, format="wav")
404
- return out_path
405
-
406
- # Harmonic Saturation
407
- def harmonic_saturation(audio, saturation_type="Tube", intensity=0.2):
408
- samples = np.array(audio.get_array_of_samples()).astype(np.float32)
409
- if saturation_type == "Tube":
410
- saturated = np.tanh(intensity * samples)
411
- elif saturation_type == "Tape":
412
- saturated = np.where(samples > 0, 1 - np.exp(-intensity * samples), -1 + np.exp(intensity * samples))
413
- elif saturation_type == "Console":
414
- saturated = np.clip(samples, -32768, 32768) * intensity
415
- elif saturation_type == "Mix Bus":
416
- saturated = np.log1p(np.abs(samples)) * np.sign(samples) * intensity
417
- else:
418
- saturated = samples
419
- return array_to_audiosegment(saturated.astype(np.int16), audio.frame_rate, channels=audio.channels)
420
-
421
- # Vocal Formant Correction
422
- def formant_correct(audio, shift=1.0):
423
- samples, sr = audiosegment_to_array(audio)
424
- corrected = librosa.effects.pitch_shift(samples, sr=sr, n_steps=shift)
425
- return array_to_audiosegment(corrected.astype(np.int16), sr, channels=audio.channels)
426
-
427
- # Voice Swap
428
- def clone_voice(source_audio, reference_audio):
429
- source = AudioSegment.from_file(source_audio)
430
- ref = AudioSegment.from_file(reference_audio)
431
- mixed = source.overlay(ref - 10)
432
- out_path = os.path.join(tempfile.gettempdir(), "cloned_output.wav")
433
- mixed.export(out_path, format="wav")
434
- return out_path
435
-
436
- # Save/Load Mix Session (.aiproj)
437
- def save_project(audio, preset, effects):
438
- project_data = {
439
- "audio": AudioSegment.from_file(audio).raw_data,
440
- "preset": preset,
441
- "effects": effects
442
- }
443
- out_path = os.path.join(tempfile.gettempdir(), "project.aiproj")
444
- with open(out_path, "wb") as f:
445
- pickle.dump(project_data, f)
446
- return out_path
447
-
448
- def load_project(project_file):
449
- with open(project_file.name, "rb") as f:
450
- data = pickle.load(f)
451
- return data["preset"], data["effects"]
452
-
453
- # Prompt-Based Editing
454
- def process_prompt(audio, prompt):
455
- return apply_noise_reduction(audio)
456
-
457
- # Vocal Pitch Correction
458
- def auto_tune_vocal(audio_path, target_key="C"):
459
- try:
460
- audio = AudioSegment.from_file(audio_path)
461
- semitones = key_to_semitone(target_key)
462
- tuned_audio = apply_pitch_shift(audio, semitones)
463
- out_path = os.path.join(tempfile.gettempdir(), "autotuned_output.wav")
464
- tuned_audio.export(out_path, format="wav")
465
- return out_path
466
- except Exception as e:
467
- print(f"Auto-Tune Error: {e}")
468
- return None
469
-
470
- def key_to_semitone(key="C"):
471
- keys = {"C": 0, "C#": 1, "D": 2, "D#": 3, "E": 4, "F": 5,
472
- "F#": 6, "G": 7, "G#": 8, "A": 9, "A#": 10, "B": 11}
473
- return keys.get(key, 0)
474
-
475
- # Loop Section Tool
476
- def loop_section(audio_path, start_ms, end_ms, loops=2):
477
- audio = AudioSegment.from_file(audio_path)
478
- section = audio[start_ms:end_ms]
479
- looped = section * loops
480
- out_path = os.path.join(tempfile.gettempdir(), "looped_output.wav")
481
- looped.export(out_path, format="wav")
482
- return out_path
483
-
484
- # Frequency Spectrum Visualization
485
- def visualize_spectrum(audio_path):
486
- y, sr = torchaudio.load(audio_path)
487
- y_np = y.numpy().flatten()
488
- stft = librosa.stft(y_np)
489
- db = librosa.amplitude_to_db(abs(stft))
490
- plt.figure(figsize=(10, 4))
491
- img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
492
- plt.colorbar(img, format="%+2.0f dB")
493
- plt.title("Frequency Spectrum")
494
- plt.tight_layout()
495
- buf = BytesIO()
496
- plt.savefig(buf, format="png")
497
- plt.close()
498
- buf.seek(0)
499
- return Image.open(buf)
500
-
501
- # A/B Compare
502
- def compare_ab(track1_path, track2_path):
503
- return track1_path, track2_path
504
-
505
- # DAW Template Export
506
- def generate_ableton_template(stems):
507
- template = {
508
- "format": "Ableton Live",
509
- "stems": [os.path.basename(s) for s in stems],
510
- "effects": ["Reverb", "EQ", "Compression"],
511
- "tempo": 128,
512
- "title": "Studio Pulse Project"
513
- }
514
- out_path = os.path.join(tempfile.gettempdir(), "ableton_template.json")
515
- with open(out_path, "w") as f:
516
- json.dump(template, f, indent=2)
517
- return out_path
518
-
519
- # Export Full Mix ZIP
520
- def export_full_mix(stems, final_mix):
521
- zip_path = os.path.join(tempfile.gettempdir(), "full_export.zip")
522
- with zipfile.ZipFile(zip_path, "w") as zipf:
523
- for i, stem in enumerate(stems):
524
- zipf.write(stem, f"stem_{i}.wav")
525
- zipf.write(final_mix, "final_mix.wav")
526
- return zip_path
527
-
528
- # Text-to-Sound
529
-
530
- # Main UI
531
- with gr.Blocks(css="""
532
- body {
533
- font-family: 'Segoe UI', sans-serif;
534
- background-color: #1f2937;
535
- color: white;
536
- padding: 20px;
537
- }
538
- .studio-header {
539
- text-align: center;
540
- margin-bottom: 30px;
541
- animation: float 3s ease-in-out infinite;
542
- }
543
- @keyframes float {
544
- 0%, 100% { transform: translateY(0); }
545
- 50% { transform: translateY(-10px); }
546
- }
547
- .gr-button {
548
- background-color: #2563eb !important;
549
- color: white !important;
550
- border-radius: 10px;
551
- padding: 10px 20px;
552
- box-shadow: 0 0 10px #2563eb44;
553
- border: none;
554
- }
555
- """) as demo:
556
- gr.HTML('''
557
- <div class="studio-header">
558
- <h3>Where Your Audio Meets Intelligence</h3>
559
- </div>
560
- ''')
561
- gr.Markdown("### Upload, edit, export — powered by AI!")
562
-
563
- # --- Single File Studio Tab ---
564
- with gr.Tab("🎵 Single File Studio"):
565
- with gr.Row():
566
- with gr.Column(min_width=300):
567
- input_audio = gr.Audio(label="Upload Audio", type="filepath")
568
- effect_checkbox = gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order")
569
- preset_dropdown = gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0])
570
- export_format = gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
571
- isolate_vocals = gr.Checkbox(label="Isolate Vocals After Effects")
572
- submit_btn = gr.Button("Process Audio")
573
- with gr.Column(min_width=300):
574
- output_audio = gr.Audio(label="Processed Audio", type="filepath")
575
- waveform_img = gr.Image(label="Waveform Preview")
576
- session_log_out = gr.Textbox(label="Session Log", lines=5)
577
- genre_out = gr.Textbox(label="Detected Genre", lines=1)
578
- status_box = gr.Textbox(label="Status", value="✅ Ready", lines=1)
579
- submit_btn.click(fn=process_audio, inputs=[
580
- input_audio, effect_checkbox, isolate_vocals, preset_dropdown, export_format
581
- ], outputs=[
582
- output_audio, waveform_img, session_log_out, genre_out, status_box
583
- ])
584
-
585
- # --- Remix Mode – Stem Splitting + Per-Stem Effects ===
586
- with gr.Tab("🎛 Remix Mode"):
587
- with gr.Row():
588
- with gr.Column(min_width=200):
589
- input_audio_remix = gr.Audio(label="Upload Music Track", type="filepath")
590
- split_button = gr.Button("Split Into Drums, Bass, Vocals, etc.")
591
- with gr.Column(min_width=400):
592
- stem_outputs = [
593
- gr.File(label="Vocals"),
594
- gr.File(label="Drums"),
595
- gr.File(label="Bass"),
596
- gr.File(label="Other")
597
- ]
598
- split_button.click(fn=stem_split, inputs=[input_audio_remix], outputs=stem_outputs)
599
-
600
- # --- AI Remastering Tab – Now Fixed & Working ===
601
- with gr.Tab("🔮 AI Remastering"):
602
- gr.Interface(
603
- fn=ai_remaster,
604
- inputs=gr.Audio(label="Upload Low-Quality Recording", type="filepath"),
605
- outputs=gr.Audio(label="Studio-Grade Output", type="filepath"),
606
- title="Transform Low-Quality Recordings to Studio Sound",
607
- description="Uses noise reduction, vocal isolation, and mastering to enhance old recordings.",
608
- allow_flagging="never"
609
- )
610
-
611
- # --- Harmonic Saturation / Exciter – Now Included ===
612
- with gr.Tab("🧬 Harmonic Saturation"):
613
- gr.Interface(
614
- fn=harmonic_saturation,
615
- inputs=[
616
- gr.Audio(label="Upload Track", type="filepath"),
617
- gr.Dropdown(choices=["Tube", "Tape", "Console", "Mix Bus"], label="Saturation Type", value="Tube"),
618
- gr.Slider(minimum=0.1, maximum=1.0, value=0.2, label="Intensity")
619
- ],
620
- outputs=gr.Audio(label="Warm Output", type="filepath"),
621
- title="Add Analog-Style Warmth",
622
- description="Enhance clarity and presence using saturation styles like Tube or Tape.",
623
- allow_flagging="never"
624
- )
625
-
626
- # --- Vocal Doubler / Harmonizer – Added Back ===
627
- with gr.Tab("🎧 Vocal Doubler / Harmonizer"):
628
- gr.Interface(
629
- fn=lambda x: apply_harmony(x),
630
- inputs=gr.Audio(label="Upload Vocal Clip", type="filepath"),
631
- outputs=gr.Audio(label="Doubled Output", type="filepath"),
632
- title="Add Vocal Doubling / Harmony",
633
- description="Enhance vocals with doubling or harmony"
634
- )
635
-
636
- # --- Batch Processing – Full Support ===
637
- with gr.Tab("🔊 Batch Processing"):
638
- gr.Interface(
639
- fn=batch_process_audio,
640
- inputs=[
641
- gr.File(label="Upload Multiple Files", file_count="multiple"),
642
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Apply Effects in Order"),
643
- gr.Checkbox(label="Isolate Vocals After Effects"),
644
- gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
645
- gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
646
- ],
647
- outputs=[
648
- gr.File(label="Download ZIP of All Processed Files"),
649
- gr.Textbox(label="Status", value="✅ Ready", lines=1)
650
- ],
651
- title="Batch Audio Processor",
652
- description="Upload multiple files, apply effects in bulk, and download all results in a single ZIP.",
653
- flagging_mode="never",
654
- submit_btn="Process All Files"
655
- )
656
-
657
- # --- Vocal Pitch Correction – Auto-Tune Style ===
658
- with gr.Tab("🎤 AI Auto-Tune"):
659
- gr.Interface(
660
- fn=auto_tune_vocal,
661
- inputs=[
662
- gr.File(label="Source Voice Clip"),
663
- gr.Textbox(label="Target Key", value="C", lines=1)
664
- ],
665
- outputs=gr.Audio(label="Pitch-Corrected Output", type="filepath"),
666
- title="AI Auto-Tune",
667
- description="Correct vocal pitch automatically using AI"
668
- )
669
-
670
- # --- Frequency Spectrum Tab – Real-time Visualizer ===
671
- with gr.Tab("📊 Frequency Spectrum"):
672
- gr.Interface(
673
- fn=visualize_spectrum,
674
- inputs=gr.Audio(label="Upload Track", type="filepath"),
675
- outputs=gr.Image(label="Spectrum Analysis")
676
- )
677
-
678
- # --- Loudness Graph Tab – EBU R128 Matching ===
679
- with gr.Tab("📈 Loudness Graph"):
680
- gr.Interface(
681
- fn=match_loudness,
682
- inputs=[
683
- gr.Audio(label="Upload Track", type="filepath"),
684
- gr.Slider(minimum=-24, maximum=-6, value=-14, label="Target LUFS")
685
- ],
686
- outputs=gr.Audio(label="Normalized Output", type="filepath"),
687
- title="Match Loudness Across Tracks",
688
- description="Ensure consistent volume using EBU R128 standard"
689
- )
690
-
691
- # --- Save/Load Mix Session (.aiproj) – Added Back ===
692
- with gr.Tab("📁 Save/Load Project"):
693
- with gr.Row():
694
- with gr.Column(min_width=300):
695
- gr.Interface(
696
- fn=save_project,
697
- inputs=[
698
- gr.File(label="Original Audio"),
699
- gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
700
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Applied Effects")
701
- ],
702
- outputs=gr.File(label="Project File (.aiproj)")
703
- )
704
- with gr.Column(min_width=300):
705
- gr.Interface(
706
- fn=load_project,
707
- inputs=gr.File(label="Upload .aiproj File"),
708
- outputs=[
709
- gr.Dropdown(choices=preset_names, label="Loaded Preset"),
710
- gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects")
711
- ],
712
- title="Resume Last Project",
713
- description="Load your saved session"
714
- )
715
-
716
- # --- Prompt-Based Editing Tab – Added Back ===
717
- with gr.Tab("🧠 Prompt-Based Editing"):
718
- gr.Interface(
719
- fn=process_prompt,
720
- inputs=[
721
- gr.File(label="Upload Audio", type="filepath"),
722
- gr.Textbox(label="Describe What You Want", lines=5)
723
- ],
724
- outputs=gr.Audio(label="Edited Output", type="filepath"),
725
- title="Type Your Edits – AI Does the Rest",
726
- description="Say what you want done and let AI handle it.",
727
- allow_flagging="never"
728
- )
729
-
730
- # --- Custom EQ Editor ===
731
- with gr.Tab("🎛 Custom EQ Editor"):
732
- gr.Interface(
733
- fn=auto_eq,
734
- inputs=[
735
- gr.Audio(label="Upload Track", type="filepath"),
736
- gr.Dropdown(choices=list(eq_map.keys()), label="Genre", value="Pop")
737
- ],
738
- outputs=gr.Audio(label="EQ-Enhanced Output", type="filepath"),
739
- title="Custom EQ by Genre",
740
- description="Apply custom EQ based on genre"
741
- )
742
-
743
- # --- A/B Compare ===
744
- with gr.Tab("🎯 A/B Compare"):
745
- gr.Interface(
746
- fn=compare_ab,
747
- inputs=[
748
- gr.Audio(label="Version A", type="filepath"),
749
- gr.Audio(label="Version B", type="filepath")
750
- ],
751
- outputs=[
752
- gr.Audio(label="Version A", type="filepath"),
753
- gr.Audio(label="Version B", type="filepath")
754
- ],
755
- title="Compare Two Versions",
756
- description="Hear two mixes side-by-side",
757
- allow_flagging="never"
758
- )
759
-
760
- # --- Loop Playback ===
761
- with gr.Tab("🔁 Loop Playback"):
762
- gr.Interface(
763
- fn=loop_section,
764
- inputs=[
765
- gr.Audio(label="Upload Track", type="filepath"),
766
- gr.Slider(minimum=0, maximum=30000, step=100, value=5000, label="Start MS"),
767
- gr.Slider(minimum=100, maximum=30000, step=100, value=10000, label="End MS"),
768
- gr.Slider(minimum=1, maximum=10, value=2, label="Repeat Loops")
769
- ],
770
- outputs=gr.Audio(label="Looped Output", type="filepath"),
771
- title="Repeat a Section",
772
- description="Useful for editing a specific part"
773
- )
774
-
775
- # --- Share Effect Chain Tab – Now Defined! ===
776
- with gr.Tab("🔗 Share Effect Chain"):
777
- gr.Interface(
778
- fn=lambda x: json.dumps(x),
779
- inputs=gr.CheckboxGroup(choices=preset_choices["Default"]),
780
- outputs=gr.Textbox(label="Share Code", lines=2),
781
- title="Copy/Paste Effect Chain",
782
- description="Share your setup via link/code"
783
- )
784
-
785
- with gr.Tab("📥 Load Shared Chain"):
786
- gr.Interface(
787
- fn=json.loads,
788
- inputs=gr.Textbox(label="Paste Shared Code", lines=2),
789
- outputs=gr.CheckboxGroup(choices=preset_choices["Default"], label="Loaded Effects"),
790
- title="Restore From Shared Chain",
791
- description="Paste shared effect chain JSON to restore settings"
792
- )
793
-
794
- # --- Keyboard Shortcuts Tab ===
795
- with gr.Tab("⌨ Keyboard Shortcuts"):
796
- gr.Markdown("""
797
- ### Keyboard Controls
798
- - `Ctrl + Z`: Undo last effect
799
- - `Ctrl + Y`: Redo
800
- - `Spacebar`: Play/Stop playback
801
- - `Ctrl + S`: Save current session
802
- - `Ctrl + O`: Open session
803
- - `Ctrl + C`: Copy effect chain
804
- - `Ctrl + V`: Paste effect chain
805
- """)
806
-
807
- # --- Vocal Formant Correction – Now Defined! ===
808
- with gr.Tab("🧑‍🎤 Vocal Formant Correction"):
809
- gr.Interface(
810
- fn=formant_correct,
811
- inputs=[
812
- gr.Audio(label="Upload Vocal Track", type="filepath"),
813
- gr.Slider(minimum=-2, maximum=2, value=1.0, label="Formant Shift")
814
- ],
815
- outputs=gr.Audio(label="Natural-Sounding Vocal", type="filepath"),
816
- title="Preserve Vocal Quality During Pitch Shift",
817
- description="Make pitch-shifted vocals sound more human"
818
- )
819
-
820
- # --- Voice Swap / Cloning – New Tab ===
821
- with gr.Tab("🔁 Voice Swap / Cloning"):
822
- gr.Interface(
823
- fn=clone_voice,
824
- inputs=[
825
- gr.File(label="Source Voice Clip"),
826
- gr.File(label="Reference Voice")
827
- ],
828
- outputs=gr.Audio(label="Converted Output", type="filepath"),
829
- title="Swap Voices Using AI",
830
- description="Clone or convert voice from one to another"
831
- )
832
-
833
- # --- DAW Template Export – Now Included ===
834
- with gr.Tab("🎛 DAW Template Export"):
835
- gr.Interface(
836
- fn=generate_ableton_template,
837
- inputs=[gr.File(label="Upload Stems", file_count="multiple")],
838
- outputs=gr.File(label="DAW Template (.json/.als/.flp)")
839
- )
840
-
841
- # --- Export Full Mix ZIP – Added Back ===
842
- with gr.Tab("📁 Export Full Mix ZIP"):
843
- gr.Interface(
844
- fn=export_full_mix,
845
- inputs=[
846
- gr.File(label="Stems", file_count="multiple"),
847
- gr.File(label="Final Mix")
848
- ],
849
- outputs=gr.File(label="Full Mix Archive (.zip)"),
850
- title="Export Stems + Final Mix Together",
851
- description="Perfect for sharing with producers or archiving"
852
- )
853
-
854
- # Launch Gradio App
855
- demo.launch()
856
 
 
 
 
 
857
 
858
- # === Hugging Face API Integration ===
859
  def hf_api_process(audio_data_url, effects_json, isolate, preset, export_format):
860
  try:
861
- import base64, tempfile, json
862
- from pydub import AudioSegment
863
- header, base64_data = audio_data_url.split(",", 1)
864
- audio_bytes = base64.b64decode(base64_data)
865
- suffix = ".mp3" if "mpeg" in header else ".wav"
866
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
867
- f.write(audio_bytes)
868
  input_path = f.name
 
869
  effects = json.loads(effects_json) if isinstance(effects_json, str) else effects_json
870
  output_path, *_ = process_audio(input_path, effects, isolate, preset, export_format)
 
871
  with open(output_path, "rb") as f:
872
- out_b64 = base64.b64encode(f.read()).decode("utf-8")
873
- mime = "audio/wav" if export_format.lower() == "wav" else "audio/mpeg"
874
  return f"data:{mime};base64,{out_b64}"
 
875
  except Exception as e:
876
  return f"Error: {str(e)}"
877
 
878
- # Add standalone API interface for Hugging Face to access
879
- gr.Interface(
880
  fn=hf_api_process,
881
  inputs=[
882
  gr.Text(label="Audio Base64 Data URL"),
@@ -887,4 +44,13 @@ gr.Interface(
887
  ],
888
  outputs=gr.Text(label="Processed Audio as Base64 URL"),
889
  allow_flagging="never"
890
- ).launch(inline=False, share=False)
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import base64
3
+ import tempfile
4
  import json
5
+ import os
6
+ from pydub import AudioSegment
7
+ from main_processing_module import process_audio # Or your actual import
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # === Your existing Gradio UI setup (Blocks) ===
10
+ with gr.Blocks() as demo:
11
+ # Your full UI code: tabs, controls, process button, etc.
12
+ # e.g. gr.Markdown, gr.Upload, gr.Button --> do_demo()
13
 
14
+ # === The API function Hugging Face calls ===
15
  def hf_api_process(audio_data_url, effects_json, isolate, preset, export_format):
16
  try:
17
+ header, b64 = audio_data_url.split(",", 1)
18
+ data = base64.b64decode(b64)
19
+ ext = ".mp3" if "mpeg" in header else ".wav"
20
+ with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as f:
21
+ f.write(data)
 
 
22
  input_path = f.name
23
+
24
  effects = json.loads(effects_json) if isinstance(effects_json, str) else effects_json
25
  output_path, *_ = process_audio(input_path, effects, isolate, preset, export_format)
26
+
27
  with open(output_path, "rb") as f:
28
+ out_b64 = base64.b64encode(f.read()).decode()
29
+ mime = "audio/wav" if export_format.lower()=="wav" else "audio/mpeg"
30
  return f"data:{mime};base64,{out_b64}"
31
+
32
  except Exception as e:
33
  return f"Error: {str(e)}"
34
 
35
+ # === Combine both UI and API into a single launch ===
36
+ api_interface = gr.Interface(
37
  fn=hf_api_process,
38
  inputs=[
39
  gr.Text(label="Audio Base64 Data URL"),
 
44
  ],
45
  outputs=gr.Text(label="Processed Audio as Base64 URL"),
46
  allow_flagging="never"
47
+ )
48
+
49
+ # 🚀 Launch both UI and API together
50
+ demo.queue()
51
+ api_interface.queue()
52
+
53
+ gr.Parallel(
54
+ demo.configure(show_api=True),
55
+ api_interface.configure()
56
+ ).launch()