szili2011 commited on
Commit
4908947
·
verified ·
1 Parent(s): af5b23f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -61
app.py CHANGED
@@ -1,26 +1,23 @@
 
1
  import numpy as np
2
  import librosa
3
- from pydub import AudioSegment
4
- import gradio as gr
5
 
 
6
  def load_audio(file):
7
- # Use librosa to load the audio file directly from the file path
8
- audio, sr = librosa.load(file, sr=None) # Changed file.name to file
9
  return audio, sr
10
 
 
11
  def get_segment(audio, sr):
12
  # Calculate onset envelope and detect onsets
13
- onset_env = librosa.onset.onset_strength(y=audio, sr=sr) # Fixed the argument here
14
  onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)
15
 
16
  # If no onsets are detected, return a segment from the beginning
17
  if len(onset_frames) == 0:
18
  return audio[:2048] # Return the first segment of 1 second
19
 
20
- # Extract the pitch and dynamic range
21
- pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
22
- dynamic_range = np.max(audio) - np.min(audio)
23
-
24
  # Calculate energy over time
25
  energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
26
 
@@ -28,73 +25,41 @@ def get_segment(audio, sr):
28
  energy_threshold = np.mean(energy) + np.std(energy)
29
 
30
  # Find onsets that exceed the energy threshold
31
- relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold]
32
 
33
  # If no relevant onsets are found, fall back to the first detected onset
34
- if len(relevant_onsets) == 0:
35
- start_frame = onset_frames[0]
36
- else:
37
- start_frame = relevant_onsets[0]
38
-
39
  start_sample = librosa.frames_to_samples(start_frame)
40
 
41
  # Define a segment length in samples (1 second)
42
  segment_length = sr # 1 second segment
43
  end_sample = min(start_sample + segment_length, len(audio))
44
 
45
- # Optionally add overlap for smoother transitions
46
- overlap_length = int(sr * 0.5) # 0.5 seconds overlap
47
- overlap_start = max(start_sample - overlap_length, 0)
48
-
49
- # Return the selected segment with some context
50
- segment_with_context = audio[overlap_start:end_sample]
51
-
52
- return segment_with_context
53
 
 
54
  def extend_music(file, added_minutes):
55
  audio, sr = load_audio(file)
56
-
57
- # Extract a segment from the audio
58
  segment = get_segment(audio, sr)
59
 
60
- # Convert the original audio to pydub format for manipulation
61
- original_audio_segment = AudioSegment.from_file(file)
62
-
63
- # Calculate the original duration in seconds
64
- original_duration_seconds = len(audio) / sr
65
 
66
- # Calculate the new duration in seconds
67
- new_duration_seconds = original_duration_seconds + (added_minutes * 60)
68
-
69
- # Calculate how many times to repeat the segment based on new duration
70
- num_repeats = int(new_duration_seconds // (len(segment) / sr))
71
-
72
- # Create the extended audio by repeating the segment
73
- extended_audio = original_audio_segment + segment.tobytes() * num_repeats
74
-
75
  return extended_audio
76
 
77
- def process_audio(file, added_minutes):
78
- extended_audio = extend_music(file, added_minutes)
 
 
 
 
79
 
80
- # Export the extended audio to a temporary file and load it back
81
- output_path = "extended_audio.mp3"
82
- extended_audio.export(output_path, format="mp3")
83
-
84
- # Load the exported audio for returning
85
- audio_output, _ = librosa.load(output_path, sr=None)
86
- return audio_output
87
-
88
- # Create the Gradio interface
89
- iface = gr.Interface(
90
- fn=process_audio,
91
- inputs=[
92
- gr.Audio(type="filepath"), # File input for audio
93
- gr.Slider(minimum=0, maximum=10, value=1, label="Additional Minutes") # Slider for additional minutes
94
- ],
95
- outputs=gr.Audio(type="numpy"), # Output for the extended audio as numpy
96
- title="Advanced Music Extender",
97
- description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration."
98
- )
99
 
100
- iface.launch()
 
 
1
+ import gradio as gr
2
  import numpy as np
3
  import librosa
 
 
4
 
5
+ # Function to load audio using librosa
6
  def load_audio(file):
7
+ # Load the audio file and return the audio array and sample rate
8
+ audio, sr = librosa.load(file, sr=None)
9
  return audio, sr
10
 
11
+ # Function to get a relevant audio segment based on onset detection
12
  def get_segment(audio, sr):
13
  # Calculate onset envelope and detect onsets
14
+ onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
15
  onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)
16
 
17
  # If no onsets are detected, return a segment from the beginning
18
  if len(onset_frames) == 0:
19
  return audio[:2048] # Return the first segment of 1 second
20
 
 
 
 
 
21
  # Calculate energy over time
22
  energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
23
 
 
25
  energy_threshold = np.mean(energy) + np.std(energy)
26
 
27
  # Find onsets that exceed the energy threshold
28
+ relevant_onsets = [f for f in onset_frames if f < len(energy) and energy[f] > energy_threshold]
29
 
30
  # If no relevant onsets are found, fall back to the first detected onset
31
+ start_frame = relevant_onsets[0] if relevant_onsets else onset_frames[0]
 
 
 
 
32
  start_sample = librosa.frames_to_samples(start_frame)
33
 
34
  # Define a segment length in samples (1 second)
35
  segment_length = sr # 1 second segment
36
  end_sample = min(start_sample + segment_length, len(audio))
37
 
38
+ # Return the selected segment
39
+ return audio[start_sample:end_sample]
 
 
 
 
 
 
40
 
41
+ # Function to extend music by adding silence
42
  def extend_music(file, added_minutes):
43
  audio, sr = load_audio(file)
44
+
45
+ # Get a relevant segment from the audio
46
  segment = get_segment(audio, sr)
47
 
48
+ # Calculate the number of samples to add based on the duration in minutes
49
+ additional_samples = added_minutes * 60 * sr
50
+ extended_audio = np.concatenate([segment] + [np.zeros(int(additional_samples))])
 
 
51
 
 
 
 
 
 
 
 
 
 
52
  return extended_audio
53
 
54
+ # Gradio UI setup
55
+ with gr.Blocks() as app:
56
+ gr.Markdown("# Audio Extender")
57
+ audio_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
58
+ added_minutes = gr.Slider(minimum=0, maximum=10, label="Additional Minutes")
59
+ audio_output = gr.Audio(type="numpy", label="Extended Audio")
60
 
61
+ submit_button = gr.Button("Extend Audio")
62
+ submit_button.click(extend_music, inputs=[audio_input, added_minutes], outputs=audio_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ # Launch the app
65
+ app.launch()