tee342 commited on
Commit
98f6048
Β·
verified Β·
1 Parent(s): f6738b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +218 -73
app.py CHANGED
@@ -99,15 +99,6 @@ def apply_limiter(audio, limit_dB=-1):
99
  limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
100
  return limiter.apply_gain(limit_dB)
101
 
102
- def apply_phaser(audio):
103
- return audio._spawn(audio.raw_data, overrides={"frame_rate": int(audio.frame_rate * 1.1)})
104
-
105
- def apply_bitcrush(audio, bit_depth=8):
106
- samples = np.array(audio.get_array_of_samples()).astype(np.float32)
107
- max_val = np.iinfo(np.int16).max
108
- crushed = ((samples / max_val) * (2 ** bit_depth)).astype(np.int16)
109
- return array_to_audiosegment(crushed, audio.frame_rate, channels=audio.channels)
110
-
111
  def apply_auto_gain(audio, target_dB=-20):
112
  change = target_dB - audio.dBFS
113
  return audio.apply_gain(change)
@@ -158,67 +149,46 @@ def auto_eq(audio, genre="Pop"):
158
 
159
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
160
 
161
- # === AI Voice Effects – Harmony / Doubling / Tuning ===
162
- def pitch_correct(audio, target_key="C", semitones=None):
163
- if semitones is None:
164
- # Detect key and calculate needed shift
165
- semitones = 0 # Placeholder
166
- return apply_pitch_shift(audio, semitones)
167
-
168
- def vocal_doubling(audio):
169
- double1 = apply_pitch_shift(audio, 0.3)
170
- double2 = apply_pitch_shift(audio, -0.3)
171
- return audio.overlay(double1).overlay(double2)
172
-
173
- # === Prompt-Based Editing ===
174
- def process_prompt(audio_path, prompt):
175
- prompt = prompt.lower()
176
- audio = AudioSegment.from_file(audio_path)
177
-
178
- if "noise" in prompt or "clean" in prompt:
179
- audio = apply_noise_reduction(audio)
180
-
181
- if "normalize" in prompt or "loud" in prompt:
182
- audio = apply_normalize(audio)
183
-
184
- if "bass" in prompt and ("boost" in prompt or "up" in prompt):
185
- audio = apply_bass_boost(audio)
186
 
187
- if "treble" in prompt or "highs" in prompt:
188
- audio = apply_treble_boost(audio)
 
189
 
190
- if "echo" in prompt or "reverb" in prompt:
191
- audio = apply_reverb(audio)
 
192
 
193
- if "pitch" in prompt and "correct" in prompt:
194
- audio = pitch_correct(audio)
 
195
 
196
- if "harmony" in prompt or "double" in prompt:
197
- audio = vocal_doubling(audio)
198
 
199
- out_path = os.path.join(tempfile.gettempdir(), "prompt_output.wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  audio.export(out_path, format="wav")
201
  return out_path
202
 
203
- # === Spectrum Analyzer + EQ Visualizer ===
204
- def visualize_spectrum(audio_path):
205
- y, sr = torchaudio.load(audio_path)
206
- y_np = y.numpy().flatten()
207
-
208
- stft = librosa.stft(y_np)
209
- db = librosa.amplitude_to_db(abs(stft))
210
-
211
- plt.figure(figsize=(10, 4))
212
- img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
213
- plt.colorbar(img, format="%+2.0f dB")
214
- plt.title("Frequency Spectrum")
215
- plt.tight_layout()
216
- buf = BytesIO()
217
- plt.savefig(buf, format="png")
218
- plt.close()
219
- buf.seek(0)
220
- return Image.open(buf)
221
-
222
  # === Vocal Isolation Helpers ===
223
  def load_track_local(path, sample_rate, channels=2):
224
  sig, rate = torchaudio.load(path)
@@ -433,7 +403,7 @@ def transcribe_audio(audio_path):
433
  # === TTS Tab ===
434
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
435
 
436
- def generate_tTS(text):
437
  out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
438
  tts.tts_to_file(text=text, file_path=out_path)
439
  return out_path
@@ -527,6 +497,28 @@ def diarize_and_transcribe(audio_path):
527
  except Exception as e:
528
  return f"⚠️ Diarization failed: {str(e)}"
529
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
  # === UI ===
531
  effect_options = [
532
  "Noise Reduction",
@@ -619,16 +611,41 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
619
  # --- Genre Mastering Tab ===
620
  with gr.Tab("🎧 Genre Mastering"):
621
  gr.Interface(
622
- fn=lambda audio, genre: apply_genre_preset(audio, genre),
623
  inputs=[
624
  gr.Audio(label="Upload Track", type="filepath"),
625
- gr.Dropdown(choices=list(genre_presets.keys()), label="Select Genre", value="Pop")
626
  ],
627
  outputs=gr.Audio(label="Mastered Output", type="filepath"),
628
  title="Genre-Specific Mastering",
629
  description="Apply professionally tuned mastering settings for popular music genres."
630
  )
631
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
  # --- Prompt-Based Editing Tab ===
633
  with gr.Tab("🧠 Prompt-Based Editing"):
634
  gr.Interface(
@@ -643,14 +660,37 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
643
  allow_flagging="never"
644
  )
645
 
646
- # --- Spectrum Analyzer Tab ===
647
- with gr.Tab("πŸ“Š Frequency Spectrum"):
648
  gr.Interface(
649
- fn=visualize_spectrum,
650
- inputs=gr.Audio(label="Upload Track", type="filepath"),
651
- outputs=gr.Image(label="Spectrum Analysis"),
652
- title="Real-Time Spectrum Analyzer",
653
- description="See the frequency breakdown of your audio",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
654
  allow_flagging="never"
655
  )
656
 
@@ -668,7 +708,7 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
668
  description="Clone voice from source to target speaker using AI"
669
  )
670
 
671
- # --- Speaker Diarization (Who Spoke When?) ===
672
  if diarize_pipeline:
673
  with gr.Tab("πŸ§β€β™‚οΈ Who Spoke When?"):
674
  gr.Interface(
@@ -738,4 +778,109 @@ with gr.Blocks(title="AI Audio Studio", css="style.css") as demo:
738
  description="Detect and trim silence at start/end or between words"
739
  )
740
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
741
  demo.launch()
 
99
  limiter = audio._spawn(audio.raw_data, overrides={"frame_rate": audio.frame_rate})
100
  return limiter.apply_gain(limit_dB)
101
 
 
 
 
 
 
 
 
 
 
102
  def apply_auto_gain(audio, target_dB=-20):
103
  change = target_dB - audio.dBFS
104
  return audio.apply_gain(change)
 
149
 
150
  return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
151
 
152
+ # === Real-Time EQ Sliders ===
153
+ def real_time_eq(audio, low_gain=0, mid_gain=0, high_gain=0):
154
+ samples, sr = audiosegment_to_array(audio)
155
+ samples = samples.astype(np.float64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ # Low EQ: 20–500Hz
158
+ sos_low = butter(10, [20, 500], btype='band', output='sos', fs=sr)
159
+ samples = sosfilt(sos_low, samples) * (10 ** (low_gain / 20))
160
 
161
+ # Mid EQ: 500–4000Hz
162
+ sos_mid = butter(10, [500, 4000], btype='band', output='sos', fs=sr)
163
+ samples += sosfilt(sos_mid, samples) * (10 ** (mid_gain / 20))
164
 
165
+ # High EQ: 4000–20000Hz
166
+ sos_high = butter(10, [4000, 20000], btype='high', output='sos', fs=sr)
167
+ samples += sosfilt(sos_high, samples) * (10 ** (high_gain / 20))
168
 
169
+ return array_to_audiosegment(samples.astype(np.int16), sr, channels=audio.channels)
 
170
 
171
+ # === AI Suggest Presets Based on Genre ===
172
+ genre_preset_map = {
173
+ "Speech": ["Clean Podcast", "Normalize"],
174
+ "Pop": ["Vocal Clarity", "Limiter", "Stereo Expansion"],
175
+ "EDM": ["Heavy Bass", "Stereo Expansion", "Limiter", "Phaser"],
176
+ "Rock": ["Distortion", "Punchy Mids", "Reverb"],
177
+ "Hip-Hop": ["Deep Bass", "Vocal Presence", "Saturation"]
178
+ }
179
+
180
+ def suggest_preset_by_genre(genre):
181
+ return genre_preset_map.get(genre, ["Default"])
182
+
183
+ # === Create Karaoke Video from Audio + Lyrics ===
184
+ def create_karaoke_video(audio_path, lyrics, bg_image=None):
185
+ # Placeholder for video generation
186
+ print(f"Creating karaoke video with lyrics: {lyrics}")
187
+ out_path = os.path.join(tempfile.gettempdir(), "karaoke_output.wav")
188
+ audio = AudioSegment.from_file(audio_path)
189
  audio.export(out_path, format="wav")
190
  return out_path
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  # === Vocal Isolation Helpers ===
193
  def load_track_local(path, sample_rate, channels=2):
194
  sig, rate = torchaudio.load(path)
 
403
  # === TTS Tab ===
404
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
405
 
406
+ def generate_tts(text):
407
  out_path = os.path.join(tempfile.gettempdir(), "tts_output.wav")
408
  tts.tts_to_file(text=text, file_path=out_path)
409
  return out_path
 
497
  except Exception as e:
498
  return f"⚠️ Diarization failed: {str(e)}"
499
 
500
+ # === Real-Time Spectrum Analyzer + EQ Visualizer ===
501
+ def visualize_spectrum(audio_path):
502
+ y, sr = torchaudio.load(audio_path)
503
+ y_np = y.numpy().flatten()
504
+ stft = librosa.stft(y_np)
505
+ db = librosa.amplitude_to_db(abs(stft))
506
+
507
+ plt.figure(figsize=(10, 4))
508
+ img = librosa.display.specshow(db, sr=sr, x_axis="time", y_axis="hz", cmap="magma")
509
+ plt.colorbar(img, format="%+2.0f dB")
510
+ plt.title("Frequency Spectrum")
511
+ plt.tight_layout()
512
+ buf = BytesIO()
513
+ plt.savefig(buf, format="png")
514
+ plt.close()
515
+ buf.seek(0)
516
+ return Image.open(buf)
517
+
518
+ # === Real-Time EQ Sliders ===
519
+ def real_time_eq_slider(audio, low_gain, mid_gain, high_gain):
520
+ return real_time_eq(audio, low_gain, mid_gain, high_gain)
521
+
522
  # === UI ===
523
  effect_options = [
524
  "Noise Reduction",
 
611
  # --- Genre Mastering Tab ===
612
  with gr.Tab("🎧 Genre Mastering"):
613
  gr.Interface(
614
+ fn=lambda audio, genre: auto_eq(audio, genre),
615
  inputs=[
616
  gr.Audio(label="Upload Track", type="filepath"),
617
+ gr.Dropdown(choices=list(genre_preset_map.keys()), label="Select Genre", value="Pop")
618
  ],
619
  outputs=gr.Audio(label="Mastered Output", type="filepath"),
620
  title="Genre-Specific Mastering",
621
  description="Apply professionally tuned mastering settings for popular music genres."
622
  )
623
 
624
+ # --- Real-Time EQ ===
625
+ with gr.Tab("πŸŽ› Real-Time EQ"):
626
+ gr.Interface(
627
+ fn=real_time_eq_slider,
628
+ inputs=[
629
+ gr.Audio(label="Upload Track", type="filepath"),
630
+ gr.Slider(minimum=-12, maximum=12, value=0, label="Low Gain (-200–500Hz)"),
631
+ gr.Slider(minimum=-12, maximum=12, value=0, label="Mid Gain (500Hz–4kHz)"),
632
+ gr.Slider(minimum=-12, maximum=12, value=0, label="High Gain (4kHz+)"),
633
+ ],
634
+ outputs=gr.Audio(label="EQ'd Output", type="filepath"),
635
+ title="Adjust Frequency Bands Live",
636
+ description="Fine-tune your sound using real-time sliders for low, mid, and high frequencies."
637
+ )
638
+
639
+ # --- Spectrum Visualizer ===
640
+ with gr.Tab("πŸ“Š Frequency Spectrum"):
641
+ gr.Interface(
642
+ fn=visualize_spectrum,
643
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
644
+ outputs=gr.Image(label="Spectrum Analysis"),
645
+ title="Real-Time Spectrum Analyzer",
646
+ description="See the frequency breakdown of your audio"
647
+ )
648
+
649
  # --- Prompt-Based Editing Tab ===
650
  with gr.Tab("🧠 Prompt-Based Editing"):
651
  gr.Interface(
 
660
  allow_flagging="never"
661
  )
662
 
663
+ # --- Vocal Presets for Singers ===
664
+ with gr.Tab("🎀 Vocal Presets for Singers"):
665
  gr.Interface(
666
+ fn=process_audio,
667
+ inputs=[
668
+ gr.Audio(label="Upload Vocal Track", type="filepath"),
669
+ gr.CheckboxGroup(choices=[
670
+ "Noise Reduction",
671
+ "Normalize",
672
+ "Compress Dynamic Range",
673
+ "Bass Boost",
674
+ "Treble Boost",
675
+ "Reverb",
676
+ "Auto Gain",
677
+ "Vocal Distortion",
678
+ "Harmony",
679
+ "Stage Mode"
680
+ ]),
681
+ gr.Checkbox(label="Isolate Vocals After Effects"),
682
+ gr.Dropdown(choices=preset_names, label="Select Vocal Preset", value=preset_names[0]),
683
+ gr.Dropdown(choices=["MP3", "WAV"], label="Export Format", value="MP3")
684
+ ],
685
+ outputs=[
686
+ gr.Audio(label="Processed Vocal", type="filepath"),
687
+ gr.Image(label="Waveform Preview"),
688
+ gr.Textbox(label="Session Log (JSON)", lines=5),
689
+ gr.Textbox(label="Detected Genre", lines=1),
690
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
691
+ ],
692
+ title="Create Studio-Quality Vocal Tracks",
693
+ description="Apply singer-friendly presets and effects to enhance vocals.",
694
  allow_flagging="never"
695
  )
696
 
 
708
  description="Clone voice from source to target speaker using AI"
709
  )
710
 
711
+ # --- Speaker Diarization ("Who Spoke When?") ===
712
  if diarize_pipeline:
713
  with gr.Tab("πŸ§β€β™‚οΈ Who Spoke When?"):
714
  gr.Interface(
 
778
  description="Detect and trim silence at start/end or between words"
779
  )
780
 
781
+ # --- Save/Load Project File (.aiproj) ===
782
+ with gr.Tab("πŸ“ Save/Load Project"):
783
+ gr.Interface(
784
+ fn=save_project,
785
+ inputs=[
786
+ gr.File(label="Original Audio"),
787
+ gr.Dropdown(choices=preset_names, label="Used Preset", value=preset_names[0]),
788
+ gr.CheckboxGroup(choices=effect_options, label="Applied Effects")
789
+ ],
790
+ outputs=gr.File(label="Project File (.aiproj)"),
791
+ title="Save Everything Together",
792
+ description="Save your session, effects, and settings in one file to reuse later."
793
+ )
794
+
795
+ gr.Interface(
796
+ fn=load_project,
797
+ inputs=gr.File(label="Upload .aiproj File"),
798
+ outputs=[
799
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
800
+ gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
801
+ ],
802
+ title="Resume Last Project",
803
+ description="Load your saved session"
804
+ )
805
+
806
+ # --- Cloud Project Sync (Premium Feature) ===
807
+ with gr.Tab("☁️ Cloud Project Sync"):
808
+ gr.Markdown("Save your projects online and resume them from any device.")
809
+
810
+ project_id = gr.Textbox(label="Project ID (optional)")
811
+ project_name = gr.Textbox(label="Project Name")
812
+ project_data = gr.State()
813
+
814
+ def cloud_save_project(audio, preset, effects, name, project_id=""):
815
+ # Simulated cloud saving
816
+ project_data = {
817
+ "audio": AudioSegment.from_file(audio).raw_data,
818
+ "preset": preset,
819
+ "effects": effects
820
+ }
821
+ project_path = os.path.join(tempfile.gettempdir(), f"{name}.aiproj")
822
+ with open(project_path, "wb") as f:
823
+ pickle.dump(project_data, f)
824
+ return project_path, f"βœ… Saved as '{name}'"
825
+
826
+ def cloud_load_project(project_id):
827
+ # Simulated cloud loading
828
+ if not project_id:
829
+ return None, None, None
830
+ return "Sample Loaded", ["Noise Reduction", "Normalize"], ["Default"]
831
+
832
+ gr.Interface(
833
+ fn=cloud_save_project,
834
+ inputs=[
835
+ gr.File(label="Upload Audio", type="filepath"),
836
+ gr.Dropdown(choices=preset_names, label="Select Preset", value=preset_names[0]),
837
+ gr.CheckboxGroup(choices=effect_options, label="Effects"),
838
+ gr.Textbox(label="Project Name"),
839
+ gr.Textbox(label="Project ID (Optional)")
840
+ ],
841
+ outputs=[
842
+ gr.File(label="Downloadable Project File"),
843
+ gr.Textbox(label="Status", value="βœ… Ready", lines=1)
844
+ ],
845
+ title="Save to Cloud",
846
+ description="Save your project online and share it across devices."
847
+ )
848
+
849
+ gr.Interface(
850
+ fn=cloud_load_project,
851
+ inputs=gr.Textbox(label="Enter Project ID"),
852
+ outputs=[
853
+ gr.Audio(label="Loaded Audio", type="filepath"),
854
+ gr.Dropdown(choices=preset_names, label="Loaded Preset"),
855
+ gr.CheckboxGroup(choices=effect_options, label="Loaded Effects")
856
+ ],
857
+ title="Load from Cloud",
858
+ description="Resume a project from the cloud",
859
+ allow_flagging="never"
860
+ )
861
+
862
+ # --- AI Suggest Presets Based on Genre ===
863
+ with gr.Tab("🧠 AI Suggest Preset"):
864
+ gr.Interface(
865
+ fn=suggest_preset_by_genre,
866
+ inputs=gr.Audio(label="Upload Track", type="filepath"),
867
+ outputs=gr.Dropdown(choices=preset_names, label="Recommended Preset"),
868
+ title="AI Recommends Best Preset",
869
+ description="Upload a track and let AI recommend the best preset based on genre."
870
+ )
871
+
872
+ # --- Create Karaoke Video from Audio + Lyrics ===
873
+ with gr.Tab("πŸ“Ή Create Karaoke Video"):
874
+ gr.Interface(
875
+ fn=create_karaoke_video,
876
+ inputs=[
877
+ gr.Audio(label="Upload Track", type="filepath"),
878
+ gr.Textbox(label="Lyrics", lines=10),
879
+ gr.File(label="Background (Optional)")
880
+ ],
881
+ outputs=gr.Video(label="Karaoke Video"),
882
+ title="Make Karaoke Videos from Audio + Lyrics",
883
+ description="Generate karaoke-style videos with real-time sync."
884
+ )
885
+
886
  demo.launch()