import numpy as np
import librosa
from pydub import AudioSegment
import gradio as gr

def load_audio(file_path):
    audio, sr = librosa.load(file_path, sr=None)
    return audio, sr

def get_segment(audio, sr):
    # Calculate onset envelope and detect onsets
    onset_env = librosa.onset.onset_strength(audio, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)

    # If no onsets are detected, return a segment from the beginning
    if len(onset_frames) == 0:
        return audio[:2048]  # Return the first segment of 1 second

    # Extract the pitch and dynamic range
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
    dynamic_range = np.max(audio) - np.min(audio)

    # Calculate energy over time
    energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
    
    # Threshold to consider a segment musically relevant
    energy_threshold = np.mean(energy) + np.std(energy)

    # Find onsets that exceed the energy threshold
    relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold]

    # If no relevant onsets are found, fall back to the first detected onset
    if len(relevant_onsets) == 0:
        start_frame = onset_frames[0]
    else:
        start_frame = relevant_onsets[0]

    start_sample = librosa.frames_to_samples(start_frame)

    # Define a segment length in samples (1 second)
    segment_length = sr  # 1 second segment
    end_sample = min(start_sample + segment_length, len(audio))

    # Optionally add overlap for smoother transitions
    overlap_length = int(sr * 0.5)  # 0.5 seconds overlap
    overlap_start = max(start_sample - overlap_length, 0)

    # Return the selected segment with some context
    segment_with_context = audio[overlap_start:end_sample]

    return segment_with_context

def extend_music(file_path, added_minutes):
    audio, sr = load_audio(file_path)

    # Extract a segment from the audio
    segment = get_segment(audio, sr)

    # Convert the original audio to pydub format for manipulation
    original_audio_segment = AudioSegment.from_file(file_path)

    # Calculate the original duration in seconds
    original_duration_seconds = len(audio) / sr
    
    # Calculate the new duration in seconds
    new_duration_seconds = original_duration_seconds + (added_minutes * 60)

    # Calculate how many times to repeat the segment based on new duration
    num_repeats = int(new_duration_seconds // (len(segment) / sr))

    # Create the extended audio by repeating the segment
    extended_audio = original_audio_segment + segment.tobytes() * num_repeats

    return extended_audio

def process_audio(file, added_minutes):
    extended_audio = extend_music(file, added_minutes)
    output_path = "extended_audio.mp3"
    extended_audio.export(output_path, format="mp3")
    return output_path

# Create the Gradio interface
iface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(type="filepath"),  # Removed the source argument
        gr.Slider(minimum=0, maximum=10, default=1, label="Additional Minutes")
    ],
    outputs=gr.Audio(type="file"),  # Updated here
    title="Advanced Music Extender",
    description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration."
)

iface.launch()