Spaces:

dennisvdang
/

Chorus-Detection

Running

App Files Files Community

dennisvdang commited on May 17

Commit

606184e

1 Parent(s): ad0da04

Refactor code and remove unnecessary files

Browse files

Files changed (20) hide show

.space/app-entrypoint.sh +0 -0
Dockerfile +5 -8
app.py +0 -4
download_model.py +2 -4
setup.py +0 -1
src/chorus_detection/__init__.py +0 -10
src/chorus_detection/audio/__init__.py +0 -0
src/chorus_detection/audio/data_processing.py +0 -180
src/chorus_detection/audio/processor.py +0 -409
src/chorus_detection/config.py +0 -54
src/chorus_detection/models/__init__.py +0 -0
src/chorus_detection/models/crnn.py +0 -186
src/chorus_detection/utils/__init__.py +0 -0
src/chorus_detection/utils/cli.py +0 -107
src/chorus_detection/utils/logging.py +0 -53
src/chorus_detection/visualization/__init__.py +0 -0
src/chorus_detection/visualization/plotter.py +0 -78
src/download_model.py +0 -188
src/streamlit_app.py +0 -536
streamlit_app.py +23 -133

.space/app-entrypoint.sh CHANGED Viewed

Binary files a/.space/app-entrypoint.sh and b/.space/app-entrypoint.sh differ

Dockerfile CHANGED Viewed

@@ -27,17 +27,14 @@ RUN pip install -e .
 # Make the entry point script executable
 RUN chmod +x .space/app-entrypoint.sh || echo "Could not chmod app-entrypoint.sh"
-# Ensure chorus_detection package is properly installed
 RUN cd /app && \
-    python -c "import chorus_detection; print(f'Successfully imported chorus_detection module from {chorus_detection.__file__}')" || \
     echo "Warning: chorus_detection module not properly installed"
-# Ensure model exists and debug info
-RUN echo "Debug: ls -la /app" && ls -la /app && \
-    echo "Debug: PYTHONPATH=$PYTHONPATH" && \
-    python -c "import sys; print(f'Python path: {sys.path}')" && \
-    python -c "import os; print(f'Working directory: {os.getcwd()}')" && \
-    python -c "from download_model import ensure_model_exists; ensure_model_exists(revision='${MODEL_REVISION}')" || echo "Warning: Model download failed during build"
 # Expose port for Streamlit
 EXPOSE 7860

 # Make the entry point script executable
 RUN chmod +x .space/app-entrypoint.sh || echo "Could not chmod app-entrypoint.sh"
+# Verify chorus_detection package installation
 RUN cd /app && \
+    python -c "import chorus_detection; print(f'Successfully imported chorus_detection')" || \
     echo "Warning: chorus_detection module not properly installed"
+# Ensure model exists
+RUN python -c "from download_model import ensure_model_exists; ensure_model_exists(revision='${MODEL_REVISION}')" || \
+    echo "Warning: Model download failed during build"
 # Expose port for Streamlit
 EXPOSE 7860

app.py CHANGED Viewed

@@ -3,8 +3,6 @@
 """
 Main entry point for the Chorus Detection Streamlit app.
-This file is a simple wrapper that starts the Streamlit app
-without circular imports.
 """
 import os
@@ -29,9 +27,7 @@ if os.environ.get("SPACE_ID"):
 def main():
     """Main entry point for the Streamlit app."""
     logger.info("Starting Streamlit app...")
-    # Import the Streamlit app module directly
     import streamlit_app
-    # Run the Streamlit app
     streamlit_app.main()
 if __name__ == "__main__":

 """
 Main entry point for the Chorus Detection Streamlit app.
 """
 import os
 def main():
     """Main entry point for the Streamlit app."""
     logger.info("Starting Streamlit app...")
     import streamlit_app
     streamlit_app.main()
 if __name__ == "__main__":

download_model.py CHANGED Viewed

@@ -148,17 +148,15 @@ def ensure_model_exists(
     try:
         if HF_HUB_AVAILABLE:
             # Use huggingface_hub to download the model
-            logger.info(f"Downloading model from {repo_id}/{hf_model_filename} (revision: {revision}) using huggingface_hub")
             downloaded_path = hf_hub_download(
                 repo_id=repo_id,
                 filename=hf_model_filename,
                 local_dir=model_dir,
                 local_dir_use_symlinks=False,
-                revision=revision  # Specify the exact revision to use
             )
-            logger.info(f"Downloaded to: {downloaded_path}")
             # Rename if necessary
             if os.path.basename(downloaded_path) != model_filename:
                 downloaded_path_obj = Path(downloaded_path)

     try:
         if HF_HUB_AVAILABLE:
             # Use huggingface_hub to download the model
+            logger.info(f"Downloading model from {repo_id}/{hf_model_filename}")
             downloaded_path = hf_hub_download(
                 repo_id=repo_id,
                 filename=hf_model_filename,
                 local_dir=model_dir,
                 local_dir_use_symlinks=False,
+                revision=revision
             )
             # Rename if necessary
             if os.path.basename(downloaded_path) != model_filename:
                 downloaded_path_obj = Path(downloaded_path)

setup.py CHANGED Viewed

@@ -8,7 +8,6 @@ setup(
     version="0.1.0",
     packages=find_packages(),
     install_requires=[
-        # These are already in requirements.txt so no need to specify versions
         "numpy",
         "scipy",
         "tqdm",

     version="0.1.0",
     packages=find_packages(),
     install_requires=[
         "numpy",
         "scipy",
         "tqdm",

src/chorus_detection/__init__.py DELETED Viewed

@@ -1,10 +0,0 @@
-"""Chorus Detection package for identifying choruses in music.
-This package contains modules for:
-- Audio processing and feature extraction
-- Machine learning models for chorus detection
-- Visualization tools for audio analysis
-- Utility functions
-"""
-__version__ = "0.1.0"

src/chorus_detection/audio/__init__.py DELETED Viewed

File without changes

src/chorus_detection/audio/data_processing.py DELETED Viewed

@@ -1,180 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Module for audio data processing including segmentation and positional encoding."""
-from typing import List, Optional, Tuple, Any
-import librosa
-import numpy as np
-from chorus_detection.audio.processor import AudioFeature
-from chorus_detection.config import SR, HOP_LENGTH, MAX_FRAMES, MAX_METERS, N_FEATURES
-from chorus_detection.utils.logging import logger
-def segment_data_meters(data: np.ndarray, meter_grid: np.ndarray) -> List[np.ndarray]:
-    """Divide song data into segments based on measure grid frames.
-    Args:
-        data: The song data to be segmented
-        meter_grid: The grid indicating the start of each measure
-    Returns:
-        A list of song data segments
-    """
-    # Create segments using vectorized operations
-    meter_segments = [data[s:e] for s, e in zip(meter_grid[:-1], meter_grid[1:])]
-    # Convert all segments to float32 for consistent processing
-    meter_segments = [segment.astype(np.float32) for segment in meter_segments]
-    return meter_segments
-def positional_encoding(position: int, d_model: int) -> np.ndarray:
-    """Generate a positional encoding for a given position and model dimension.
-    Args:
-        position: The position for which to generate the encoding
-        d_model: The dimension of the model
-    Returns:
-        The positional encoding
-    """
-    # Create position array
-    positions = np.arange(position)[:, np.newaxis]
-    # Calculate dimension-based scaling factors
-    dim_indices = np.arange(d_model)[np.newaxis, :]
-    angles = positions / np.power(10000, (2 * (dim_indices // 2)) / np.float32(d_model))
-    # Apply sine to even indices and cosine to odd indices
-    encodings = np.zeros((position, d_model), dtype=np.float32)
-    encodings[:, 0::2] = np.sin(angles[:, 0::2])
-    encodings[:, 1::2] = np.cos(angles[:, 1::2])
-    return encodings
-def apply_hierarchical_positional_encoding(segments: List[np.ndarray]) -> List[np.ndarray]:
-    """Apply positional encoding at the meter and frame levels to a list of segments.
-    Args:
-        segments: The list of segments to encode
-    Returns:
-        The list of segments with applied positional encoding
-    """
-    if not segments:
-        logger.warning("No segments to encode")
-        return []
-    n_features = segments[0].shape[1]
-    # Generate measure-level positional encodings
-    measure_level_encodings = positional_encoding(len(segments), n_features)
-    # Apply hierarchical encodings to each segment
-    encoded_segments = []
-    for i, segment in enumerate(segments):
-        # Generate frame-level positional encoding
-        frame_level_encoding = positional_encoding(len(segment), n_features)
-        # Combine frame-level and measure-level encodings
-        encoded_segment = segment + frame_level_encoding + measure_level_encodings[i]
-        encoded_segments.append(encoded_segment)
-    return encoded_segments
-def pad_song(encoded_segments: List[np.ndarray], max_frames: int = MAX_FRAMES,
-             max_meters: int = MAX_METERS, n_features: int = N_FEATURES) -> np.ndarray:
-    """Pad or truncate the encoded segments to have the specified dimensions.
-    Args:
-        encoded_segments: The encoded segments to pad or truncate
-        max_frames: The maximum number of frames per segment
-        max_meters: The maximum number of meters
-        n_features: The number of features per frame
-    Returns:
-        The padded or truncated song as a numpy array
-    """
-    if not encoded_segments:
-        logger.warning("No encoded segments to pad")
-        return np.zeros((max_meters, max_frames, n_features), dtype=np.float32)
-    # Pad or truncate each meter/segment to max_frames
-    padded_meters = []
-    for meter in encoded_segments:
-        # Truncate if longer than max_frames
-        truncated_meter = meter[:max_frames] if meter.shape[0] > max_frames else meter
-        # Pad if shorter than max_frames
-        if truncated_meter.shape[0] < max_frames:
-            padding = ((0, max_frames - truncated_meter.shape[0]), (0, 0))
-            padded_meter = np.pad(truncated_meter, padding, 'constant', constant_values=0)
-        else:
-            padded_meter = truncated_meter
-        padded_meters.append(padded_meter)
-    # Create padding meter (all zeros)
-    padding_meter = np.zeros((max_frames, n_features), dtype=np.float32)
-    # Truncate or pad to max_meters
-    if len(padded_meters) > max_meters:
-        padded_song = np.array(padded_meters[:max_meters])
-    else:
-        padded_song = np.array(padded_meters + [padding_meter] * (max_meters - len(padded_meters)))
-    return padded_song
-def process_audio(audio_path: str, trim_silence: bool = True, sr: int = SR,
-                  hop_length: int = HOP_LENGTH) -> Tuple[Optional[np.ndarray], Optional[AudioFeature]]:
-    """Process an audio file, extracting features and applying positional encoding.
-    Args:
-        audio_path: The path to the audio file
-        trim_silence: Whether to trim silence from the audio
-        sr: The sample rate to use when loading the audio
-        hop_length: The hop length to use for feature extraction
-    Returns:
-        A tuple containing the processed audio and its features
-    """
-    logger.info(f"Processing audio file: {audio_path}")
-    try:
-        # First optionally strip silence
-        if trim_silence:
-            from chorus_detection.audio.processor import strip_silence
-            strip_silence(audio_path)
-        # Create audio feature object and extract features
-        audio_features = AudioFeature(audio_path=audio_path, sr=sr, hop_length=hop_length)
-        audio_features.extract_features()
-        audio_features.create_meter_grid()
-        # Segment the audio data by meter grid
-        audio_segments = segment_data_meters(
-            audio_features.combined_features, audio_features.meter_grid)
-        # Apply positional encoding
-        encoded_audio_segments = apply_hierarchical_positional_encoding(audio_segments)
-        # Pad song to fixed dimensions and add batch dimension
-        processed_audio = np.expand_dims(pad_song(encoded_audio_segments), axis=0)
-        logger.info(f"Audio processing complete: {processed_audio.shape}")
-        return processed_audio, audio_features
-    except Exception as e:
-        logger.error(f"Error processing audio: {e}")
-        import traceback
-        logger.debug(traceback.format_exc())
-        return None, None

src/chorus_detection/audio/processor.py DELETED Viewed

@@ -1,409 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Module for audio feature extraction and processing."""
-import os
-import subprocess
-import time
-from functools import reduce
-from pathlib import Path
-from typing import List, Tuple, Optional, Dict, Any, Union
-import librosa
-import numpy as np
-from pydub import AudioSegment
-from pydub.silence import detect_nonsilent
-from sklearn.preprocessing import StandardScaler
-from chorus_detection.config import SR, HOP_LENGTH, AUDIO_TEMP_PATH
-from chorus_detection.utils.logging import logger
-def extract_audio(url: str, output_path: str = str(AUDIO_TEMP_PATH)) -> Tuple[Optional[str], Optional[str]]:
-    """Download audio from YouTube URL and save as MP3 using yt-dlp.
-    Args:
-        url: YouTube URL of the audio file
-        output_path: Path to save the downloaded audio file
-    Returns:
-        Tuple containing path to the downloaded audio file and the video title, or None if download fails
-    """
-    try:
-        # Create output directory if it doesn't exist
-        os.makedirs(output_path, exist_ok=True)
-        # Create a unique filename using timestamp
-        timestamp = int(time.time())
-        output_file = os.path.join(output_path, f"audio_{timestamp}.mp3")
-        # Get the video title first
-        video_title = get_video_title(url) or f"Video_{timestamp}"
-        # Download the audio
-        success, error_msg = download_audio(url, output_file)
-        if not success:
-            handle_download_error(error_msg)
-            return None, None
-        # Check if file exists and is valid
-        if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
-            logger.info(f"Successfully downloaded: {video_title}")
-            return output_file, video_title
-        else:
-            logger.error("Download completed but file not found or empty")
-            return None, None
-    except Exception as e:
-        import traceback
-        error_details = traceback.format_exc()
-        logger.error(f"An error occurred during YouTube download: {e}")
-        logger.debug(f"Error details: {error_details}")
-        check_yt_dlp_installation()
-        return None, None
-def get_video_title(url: str) -> Optional[str]:
-    """Get the title of a YouTube video.
-    Args:
-        url: YouTube URL
-    Returns:
-        Video title if successful, None otherwise
-    """
-    try:
-        title_command = ['yt-dlp', '--get-title', '--no-warnings', url]
-        video_title = subprocess.check_output(title_command, universal_newlines=True).strip()
-        return video_title
-    except subprocess.CalledProcessError as e:
-        logger.warning(f"Could not retrieve video title: {str(e)}")
-        return None
-def download_audio(url: str, output_file: str) -> Tuple[bool, str]:
-    """Download audio from YouTube URL using yt-dlp.
-    Args:
-        url: YouTube URL
-        output_file: Output file path
-    Returns:
-        Tuple containing (success, error_message)
-    """
-    command = [
-        'yt-dlp',
-        '-f', 'bestaudio',
-        '--extract-audio',
-        '--audio-format', 'mp3',
-        '--audio-quality', '0',  # Best quality
-        '--output', output_file,
-        '--no-playlist',
-        '--verbose',
-        url
-    ]
-    process = subprocess.Popen(
-        command,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.PIPE,
-        universal_newlines=True
-    )
-    stdout, stderr = process.communicate()
-    if process.returncode != 0:
-        error_msg = f"Error downloading from YouTube (code {process.returncode}): {stderr}"
-        return False, error_msg
-    return True, ""
-def handle_download_error(error_msg: str) -> None:
-    """Handle common YouTube download errors with helpful messages.
-    Args:
-        error_msg: Error message from yt-dlp
-    """
-    logger.error(error_msg)
-    if "Sign in to confirm you're not a bot" in error_msg:
-        logger.error("YouTube is detecting automated access. Try using a local file instead.")
-    elif any(x in error_msg.lower() for x in ["unavailable video", "private video"]):
-        logger.error("The video appears to be private or unavailable. Please try another URL.")
-    elif "copyright" in error_msg.lower():
-        logger.error("The video may be blocked due to copyright restrictions.")
-    elif any(x in error_msg.lower() for x in ["rate limit", "429"]):
-        logger.error("YouTube rate limit reached. Please try again later.")
-def check_yt_dlp_installation() -> None:
-    """Check if yt-dlp is installed and provide guidance if it's not."""
-    try:
-        subprocess.run(['yt-dlp', '--version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    except FileNotFoundError:
-        logger.error("yt-dlp is not installed or not in PATH. Please install it with: pip install yt-dlp")
-def strip_silence(audio_path: str) -> None:
-    """Remove silent parts from an audio file.
-    Args:
-        audio_path: Path to the audio file
-    """
-    try:
-        sound = AudioSegment.from_file(audio_path)
-        nonsilent_ranges = detect_nonsilent(
-            sound, min_silence_len=500, silence_thresh=-50)
-        if not nonsilent_ranges:
-            logger.warning("No non-silent parts detected in the audio. Using original file.")
-            return
-        stripped = reduce(lambda acc, val: acc + sound[val[0]:val[1]],
-                        nonsilent_ranges, AudioSegment.empty())
-        stripped.export(audio_path, format='mp3')
-    except Exception as e:
-        logger.error(f"Error stripping silence: {e}")
-        logger.info("Proceeding with original audio file")
-class AudioFeature:
-    """Class for extracting and processing audio features."""
-    def __init__(self, audio_path: str, sr: int = SR, hop_length: int = HOP_LENGTH):
-        """Initialize the AudioFeature class.
-        Args:
-            audio_path: Path to the audio file
-            sr: Sample rate for audio processing
-            hop_length: Hop length for feature extraction
-        """
-        self.audio_path: str = audio_path
-        self.sr: int = sr
-        self.hop_length: int = hop_length
-        self.time_signature: int = 4
-        # Initialize all features as None
-        self.y: Optional[np.ndarray] = None
-        self.y_harm: Optional[np.ndarray] = None
-        self.y_perc: Optional[np.ndarray] = None
-        self.beats: Optional[np.ndarray] = None
-        self.chroma_acts: Optional[np.ndarray] = None
-        self.chromagram: Optional[np.ndarray] = None
-        self.combined_features: Optional[np.ndarray] = None
-        self.key: Optional[str] = None
-        self.mode: Optional[str] = None
-        self.mel_acts: Optional[np.ndarray] = None
-        self.melspectrogram: Optional[np.ndarray] = None
-        self.meter_grid: Optional[np.ndarray] = None
-        self.mfccs: Optional[np.ndarray] = None
-        self.mfcc_acts: Optional[np.ndarray] = None
-        self.n_frames: Optional[int] = None
-        self.onset_env: Optional[np.ndarray] = None
-        self.rms: Optional[np.ndarray] = None
-        self.spectrogram: Optional[np.ndarray] = None
-        self.tempo: Optional[float] = None
-        self.tempogram: Optional[np.ndarray] = None
-        self.tempogram_acts: Optional[np.ndarray] = None
-    def detect_key(self, chroma_vals: np.ndarray) -> Tuple[str, str]:
-        """Detect the key and mode (major or minor) of the audio segment.
-        Args:
-            chroma_vals: Chromagram values to analyze for key detection
-        Returns:
-            Tuple containing the detected key and mode
-        """
-        note_names = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
-        # Key profiles (Krumhansl-Kessler profiles)
-        major_profile = np.array([6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88])
-        minor_profile = np.array([6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17])
-        # Normalize profiles
-        major_profile /= np.linalg.norm(major_profile)
-        minor_profile /= np.linalg.norm(minor_profile)
-        # Calculate correlations for all possible rotations
-        major_correlations = [np.corrcoef(chroma_vals, np.roll(major_profile, i))[0, 1] for i in range(12)]
-        minor_correlations = [np.corrcoef(chroma_vals, np.roll(minor_profile, i))[0, 1] for i in range(12)]
-        # Find max correlation
-        max_major_idx = np.argmax(major_correlations)
-        max_minor_idx = np.argmax(minor_correlations)
-        # Determine mode
-        self.mode = 'major' if major_correlations[max_major_idx] > minor_correlations[max_minor_idx] else 'minor'
-        self.key = note_names[max_major_idx if self.mode == 'major' else max_minor_idx]
-        return self.key, self.mode
-    def calculate_ki_chroma(self, waveform: np.ndarray, sr: int, hop_length: int) -> np.ndarray:
-        """Calculate a normalized, key-invariant chromagram for the given audio waveform.
-        Args:
-            waveform: Audio waveform to analyze
-            sr: Sample rate of the waveform
-            hop_length: Hop length for feature extraction
-        Returns:
-            The key-invariant chromagram as a numpy array
-        """
-        # Calculate chromagram
-        chromagram = librosa.feature.chroma_cqt(
-            y=waveform, sr=sr, hop_length=hop_length, bins_per_octave=24)
-        # Normalize to [0, 1]
-        chromagram = (chromagram - chromagram.min()) / (chromagram.max() - chromagram.min() + 1e-8)
-        # Detect key
-        chroma_vals = np.sum(chromagram, axis=1)
-        key, mode = self.detect_key(chroma_vals)
-        # Make key-invariant
-        key_idx = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'].index(key)
-        shift_amount = -key_idx if mode == 'major' else -(key_idx + 3) % 12
-        return librosa.util.normalize(np.roll(chromagram, shift_amount, axis=0), axis=1)
-    def extract_features(self) -> None:
-        """Extract various audio features from the loaded audio."""
-        # Load audio
-        self.y, self.sr = librosa.load(self.audio_path, sr=self.sr)
-        # Harmonic-percussive source separation
-        self.y_harm, self.y_perc = librosa.effects.hpss(self.y)
-        # Extract spectrogram
-        self.spectrogram, _ = librosa.magphase(librosa.stft(self.y, hop_length=self.hop_length))
-        # RMS energy
-        self.rms = librosa.feature.rms(S=self.spectrogram, hop_length=self.hop_length).astype(np.float32)
-        # Mel spectrogram and activations
-        self.melspectrogram = librosa.feature.melspectrogram(
-            y=self.y, sr=self.sr, n_mels=128, hop_length=self.hop_length).astype(np.float32)
-        self.mel_acts = librosa.decompose.decompose(self.melspectrogram, n_components=3, sort=True)[1].astype(np.float32)
-        # Chromagram and activations
-        self.chromagram = self.calculate_ki_chroma(self.y_harm, self.sr, self.hop_length).astype(np.float32)
-        self.chroma_acts = librosa.decompose.decompose(self.chromagram, n_components=4, sort=True)[1].astype(np.float32)
-        # Onset detection and tempogram
-        self.onset_env = librosa.onset.onset_strength(y=self.y_perc, sr=self.sr, hop_length=self.hop_length)
-        self.tempogram = np.clip(librosa.feature.tempogram(
-            onset_envelope=self.onset_env, sr=self.sr, hop_length=self.hop_length), 0, None)
-        self.tempogram_acts = librosa.decompose.decompose(self.tempogram, n_components=3, sort=True)[1]
-        # MFCCs and activations
-        self.mfccs = librosa.feature.mfcc(y=self.y, sr=self.sr, n_mfcc=20, hop_length=self.hop_length)
-        self.mfccs += abs(np.min(self.mfccs) or 0)  # Handle negative values
-        self.mfcc_acts = librosa.decompose.decompose(self.mfccs, n_components=4, sort=True)[1].astype(np.float32)
-        # Combine features with weighted normalization
-        self._combine_features()
-    def _combine_features(self) -> None:
-        """Combine all extracted features with balanced weights."""
-        features = [self.rms, self.mel_acts, self.chroma_acts, self.tempogram_acts, self.mfcc_acts]
-        feature_names = ['rms', 'mel_acts', 'chroma_acts', 'tempogram_acts', 'mfcc_acts']
-        # Calculate dimension-based weights
-        dims = {name: feature.shape[0] for feature, name in zip(features, feature_names)}
-        total_inv_dim = sum(1 / dim for dim in dims.values())
-        weights = {name: 1 / (dims[name] * total_inv_dim) for name in feature_names}
-        # Normalize and weight each feature
-        std_weighted_features = [
-            StandardScaler().fit_transform(feature.T).T * weights[name]
-            for feature, name in zip(features, feature_names)
-        ]
-        # Combine features
-        self.combined_features = np.concatenate(std_weighted_features, axis=0).T.astype(np.float32)
-        self.n_frames = len(self.combined_features)
-    def create_meter_grid(self) -> np.ndarray:
-        """Create a grid based on the meter of the song using tempo and beats.
-        Returns:
-            Numpy array containing the meter grid frame positions
-        """
-        # Extract tempo and beat information
-        self.tempo, self.beats = librosa.beat.beat_track(
-            onset_envelope=self.onset_env, sr=self.sr, hop_length=self.hop_length)
-        # Adjust tempo if it's too slow or too fast
-        self.tempo = self._adjust_tempo(self.tempo)
-        # Create meter grid
-        self.meter_grid = self._create_meter_grid()
-        return self.meter_grid
-    def _adjust_tempo(self, tempo: float) -> float:
-        """Adjust tempo to a reasonable range.
-        Args:
-            tempo: Detected tempo
-        Returns:
-            Adjusted tempo
-        """
-        if tempo < 70:
-            return tempo * 2
-        elif tempo > 140:
-            return tempo / 2
-        return tempo
-    def _create_meter_grid(self) -> np.ndarray:
-        """Helper function to create a meter grid for the song.
-        Returns:
-            Numpy array containing the meter grid frame positions
-        """
-        # Calculate beat interval
-        seconds_per_beat = 60 / self.tempo
-        beat_interval = int(librosa.time_to_frames(seconds_per_beat, sr=self.sr, hop_length=self.hop_length))
-        # Find best matching start beat
-        if len(self.beats) >= 3:
-            best_match = max(
-                (1 - abs(np.mean(self.beats[i:i+3]) - beat_interval) / beat_interval, self.beats[i])
-                for i in range(len(self.beats) - 2)
-            )
-            anchor_frame = best_match[1] if best_match[0] > 0.95 else self.beats[0]
-        else:
-            anchor_frame = self.beats[0] if len(self.beats) > 0 else 0
-        first_beat_time = librosa.frames_to_time(anchor_frame, sr=self.sr, hop_length=self.hop_length)
-        # Calculate beats forward and backward
-        time_duration = librosa.frames_to_time(self.n_frames, sr=self.sr, hop_length=self.hop_length)
-        num_beats_forward = int((time_duration - first_beat_time) / seconds_per_beat)
-        num_beats_backward = int(first_beat_time / seconds_per_beat) + 1
-        # Create beat times
-        beat_times_forward = first_beat_time + np.arange(num_beats_forward) * seconds_per_beat
-        beat_times_backward = first_beat_time - np.arange(1, num_beats_backward) * seconds_per_beat
-        # Combine and create meter grid
-        beat_grid = np.concatenate((np.array([0.0]), beat_times_backward[::-1], beat_times_forward))
-        meter_indices = np.arange(0, len(beat_grid), self.time_signature)
-        meter_grid = beat_grid[meter_indices]
-        # Ensure grid starts at 0 and ends at frame duration
-        if meter_grid[0] != 0.0:
-            meter_grid = np.insert(meter_grid, 0, 0.0)
-        # Convert to frames
-        meter_grid = librosa.time_to_frames(meter_grid, sr=self.sr, hop_length=self.hop_length)
-        # Ensure grid ends at the last frame
-        if meter_grid[-1] != self.n_frames:
-            meter_grid = np.append(meter_grid, self.n_frames)
-        return meter_grid

src/chorus_detection/config.py DELETED Viewed

@@ -1,54 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Configuration settings for the chorus detection system."""
-import os
-from pathlib import Path
-from typing import Dict, Any, Union, Optional
-# Audio processing settings
-SR: int = 12000  # Sample rate
-HOP_LENGTH: int = 128  # Hop length for signal processing
-MAX_FRAMES: int = 300  # Maximum frames per segment
-MAX_METERS: int = 201  # Maximum meters per song
-N_FEATURES: int = 15  # Number of features
-# Project paths
-PROJECT_ROOT: Path = Path(__file__).parent.parent.parent.resolve()
-MODEL_DIR: Path = PROJECT_ROOT / "models" / "CRNN"
-MODEL_PATH: Path = MODEL_DIR / "best_model_V3.h5"
-AUDIO_TEMP_PATH: Path = PROJECT_ROOT / "output" / "temp"
-LOG_DIR: Path = PROJECT_ROOT / "logs"
-# Alternative Docker paths
-DOCKER_MODEL_PATH: str = "/app/models/CRNN/best_model_V3.h5"
-DOCKER_TEMP_PATH: str = "/app/output/temp"
-def get_env_path(env_var: str, default_path: Path) -> Path:
-    """Get a path from environment variable or use the default.
-    Args:
-        env_var: Name of the environment variable
-        default_path: Default path to use if environment variable is not set
-    Returns:
-        Path object for the specified location
-    """
-    env_value = os.environ.get(env_var)
-    if env_value:
-        return Path(env_value).resolve()
-    return default_path
-# Override paths with environment variables if provided
-MODEL_PATH = get_env_path("CHORUS_MODEL_PATH", MODEL_PATH)
-AUDIO_TEMP_PATH = get_env_path("CHORUS_TEMP_PATH", AUDIO_TEMP_PATH)
-LOG_DIR = get_env_path("CHORUS_LOG_DIR", LOG_DIR)
-# Create necessary directories
-os.makedirs(MODEL_DIR, exist_ok=True)
-os.makedirs(AUDIO_TEMP_PATH, exist_ok=True)
-os.makedirs(LOG_DIR, exist_ok=True)
-os.makedirs(PROJECT_ROOT / "output", exist_ok=True)

src/chorus_detection/models/__init__.py DELETED Viewed

File without changes

src/chorus_detection/models/crnn.py DELETED Viewed

@@ -1,186 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Module for loading and managing the CRNN model for chorus detection."""
-import os
-from typing import Any, Optional, List, Tuple, Union
-import numpy as np
-import tensorflow as tf
-from chorus_detection.config import MODEL_PATH, DOCKER_MODEL_PATH
-from chorus_detection.utils.logging import logger
-def load_CRNN_model(model_path: str = str(MODEL_PATH)) -> tf.keras.Model:
-    """Load a CRNN model with custom loss and accuracy functions.
-    Args:
-        model_path: Path to the saved model
-    Returns:
-        Loaded Keras model
-    Raises:
-        RuntimeError: If the model cannot be loaded
-    """
-    try:
-        # Define custom objects required for model loading
-        custom_objects = {
-            'custom_binary_crossentropy': lambda y_true, y_pred: y_pred,
-            'custom_accuracy': lambda y_true, y_pred: y_pred
-        }
-        # Try to load the model with custom objects
-        logger.info(f"Loading model from: {model_path}")
-        model = tf.keras.models.load_model(
-            model_path, custom_objects=custom_objects, compile=False)
-        # Compile the model with default optimizer and loss for prediction only
-        model.compile(optimizer='adam', loss='binary_crossentropy')
-        return model
-    except Exception as e:
-        logger.error(f"Error loading model from {model_path}: {e}")
-        # Try Docker container path as fallback
-        if model_path != DOCKER_MODEL_PATH and os.path.exists(DOCKER_MODEL_PATH):
-            logger.info(f"Trying Docker path: {DOCKER_MODEL_PATH}")
-            return load_CRNN_model(DOCKER_MODEL_PATH)
-        raise RuntimeError(f"Failed to load model: {e}")
-def smooth_predictions(predictions: np.ndarray) -> np.ndarray:
-    """Smooth predictions by correcting isolated mispredictions and removing short sequences.
-    Args:
-        predictions: Array of binary predictions
-    Returns:
-        Smoothed array of binary predictions
-    """
-    # Convert to numpy array if not already
-    data = np.array(predictions, copy=True) if not isinstance(predictions, np.ndarray) else predictions.copy()
-    # First pass: Correct isolated 0's (handle 0's surrounded by 1's)
-    for i in range(1, len(data) - 1):
-        if data[i] == 0 and data[i - 1] == 1 and data[i + 1] == 1:
-            data[i] = 1
-    # Second pass: Correct isolated 1's (handle 1's surrounded by 0's)
-    corrected_data = data.copy()
-    for i in range(1, len(data) - 1):
-        if data[i] == 1 and data[i - 1] == 0 and data[i + 1] == 0:
-            corrected_data[i] = 0
-    # Third pass: Remove short sequences of 1s (less than 5 consecutive 1's)
-    smoothed_data = corrected_data.copy()
-    sequence_start = None
-    for i in range(len(corrected_data)):
-        if corrected_data[i] == 1:
-            if sequence_start is None:
-                sequence_start = i
-        else:
-            if sequence_start is not None:
-                sequence_length = i - sequence_start
-                if sequence_length < 5:
-                    smoothed_data[sequence_start:i] = 0
-                sequence_start = None
-    # Handle the case where the sequence extends to the end
-    if sequence_start is not None:
-        sequence_length = len(corrected_data) - sequence_start
-        if sequence_length < 5:
-            smoothed_data[sequence_start:] = 0
-    return smoothed_data
-def make_predictions(model: tf.keras.Model, processed_audio: np.ndarray,
-                     audio_features: Any, url: Optional[str] = None,
-                     video_name: Optional[str] = None) -> np.ndarray:
-    """Generate predictions from the model and process them.
-    Args:
-        model: The loaded model for making predictions
-        processed_audio: The audio data that has been processed for prediction
-        audio_features: Audio features object containing necessary metadata
-        url: YouTube URL of the audio file (optional)
-        video_name: Name of the video (optional)
-    Returns:
-        The smoothed binary predictions
-    """
-    import librosa
-    logger.info("Generating predictions...")
-    # Make predictions
-    predictions = model.predict(processed_audio)[0]
-    # Convert to binary predictions and handle potential size mismatch
-    meter_grid_length = len(audio_features.meter_grid) - 1
-    if len(predictions) > meter_grid_length:
-        predictions = predictions[:meter_grid_length]
-    binary_predictions = np.round(predictions).flatten()
-    # Apply smoothing to improve prediction quality
-    smoothed_predictions = smooth_predictions(binary_predictions)
-    # Get times for identified chorus sections
-    meter_grid_times = librosa.frames_to_time(
-        audio_features.meter_grid,
-        sr=audio_features.sr,
-        hop_length=audio_features.hop_length
-    )
-    # Identify where choruses start
-    chorus_start_times = [
-        meter_grid_times[i] for i in range(len(smoothed_predictions))
-        if smoothed_predictions[i] == 1 and (i == 0 or smoothed_predictions[i - 1] == 0)
-    ]
-    # Print results if URL and video name are provided (CLI mode)
-    if url and video_name:
-        _print_chorus_results(url, video_name, chorus_start_times)
-    return smoothed_predictions
-def _print_chorus_results(url: str, video_name: str, chorus_start_times: List[float]) -> None:
-    """Print formatted results showing identified choruses with links.
-    Args:
-        url: YouTube URL of the analyzed video
-        video_name: Name of the video
-        chorus_start_times: List of start times (in seconds) for identified choruses
-    """
-    # Create YouTube links with time stamps
-    youtube_links = [
-        f"\033]8;;{url}&t={int(start_time)}s\033\\{url}&t={int(start_time)}s\033]8;;\033\\"
-        for start_time in chorus_start_times
-    ]
-    # Format the output
-    link_lengths = [len(link) for link in youtube_links]
-    max_length = max(link_lengths + [len(video_name), len(f"Number of choruses identified: {len(chorus_start_times)}")]) if link_lengths else 50
-    header_footer = "=" * (max_length + 4)
-    # Print the results
-    print("\n\n")
-    print(header_footer)
-    print(f"{video_name.center(max_length + 2)}")
-    print(f"Number of choruses identified: {len(chorus_start_times)}".center(max_length + 4))
-    print(header_footer)
-    if chorus_start_times:
-        for link in youtube_links:
-            print(link)
-    else:
-        print("No choruses identified.")
-    print(header_footer)

src/chorus_detection/utils/__init__.py DELETED Viewed

File without changes

src/chorus_detection/utils/cli.py DELETED Viewed

@@ -1,107 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Command-line interface utilities for the chorus detection system."""
-import argparse
-import os
-import sys
-from pathlib import Path
-from typing import Dict, Any, Optional, Tuple
-from chorus_detection.config import MODEL_PATH
-from chorus_detection.utils.logging import logger
-def parse_arguments() -> argparse.Namespace:
-    """Parse command-line arguments.
-    Returns:
-        Parsed command-line arguments
-    """
-    parser = argparse.ArgumentParser(
-        description="Chorus Finder - Detect choruses in songs from YouTube URLs or local audio files")
-    input_group = parser.add_mutually_exclusive_group()
-    input_group.add_argument("--url", type=str,
-                        help="YouTube URL of a song")
-    input_group.add_argument("--file", type=str,
-                        help="Path to a local audio file")
-    parser.add_argument("--model_path", type=str, default=str(MODEL_PATH),
-                        help=f"Path to the pretrained model (default: {MODEL_PATH})")
-    parser.add_argument("--verbose", action="store_true",
-                        help="Verbose output", default=True)
-    parser.add_argument("--plot", action="store_true",
-                        help="Display plot of the audio waveform", default=True)
-    parser.add_argument("--no-plot", dest="plot", action="store_false",
-                        help="Disable plot display (useful for headless environments)")
-    return parser.parse_args()
-def get_input_source(args: argparse.Namespace) -> Optional[str]:
-    """Get input source from arguments or user input.
-    Args:
-        args: Parsed command-line arguments
-    Returns:
-        Input source (URL or file path)
-    """
-    input_source = args.url or args.file
-    if not input_source:
-        print("\nChorus Detection Tool")
-        print("====================")
-        print("\nNote: YouTube download functionality may be temporarily unavailable")
-        print("due to YouTube's restrictions. If download fails, please use a local audio file.\n")
-        print("Choose input method:")
-        print("1. YouTube URL")
-        print("2. Local audio file")
-        choice = input("Enter choice (1 or 2): ")
-        if choice == "1":
-            input_source = input("Please enter the YouTube URL of the song: ")
-        elif choice == "2":
-            input_source = input("Please enter the path to the audio file: ")
-        else:
-            logger.error("Invalid choice")
-            sys.exit(1)
-    return input_source
-def is_youtube_url(input_source: str) -> bool:
-    """Check if the input source is a YouTube URL.
-    Args:
-        input_source: Input source to check
-    Returns:
-        True if the input source is a YouTube URL, False otherwise
-    """
-    return input_source.startswith(('http://', 'https://'))
-def validate_input_file(file_path: str) -> bool:
-    """Validate that the input file exists and is readable.
-    Args:
-        file_path: Path to the input file
-    Returns:
-        True if the file is valid, False otherwise
-    """
-    if not os.path.exists(file_path):
-        logger.error(f"Error: File not found at {file_path}")
-        return False
-    if not os.path.isfile(file_path):
-        logger.error(f"Error: {file_path} is not a file")
-        return False
-    if not os.access(file_path, os.R_OK):
-        logger.error(f"Error: No permission to read {file_path}")
-        return False
-    return True

src/chorus_detection/utils/logging.py DELETED Viewed

@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Logging configuration for the chorus detection system."""
-import logging
-import os
-import sys
-from typing import Optional
-from chorus_detection.config import PROJECT_ROOT
-# Create logs directory
-LOGS_DIR = PROJECT_ROOT / "logs"
-os.makedirs(LOGS_DIR, exist_ok=True)
-def setup_logger(name: str = "chorus_detection", level: int = logging.INFO,
-                 log_file: Optional[str] = None) -> logging.Logger:
-    """Configure and return a logger with the specified name and level.
-    Args:
-        name: Name of the logger
-        level: Logging level (default: INFO)
-        log_file: Path to the log file (default: None)
-    Returns:
-        Configured logger instance
-    """
-    logger = logging.getLogger(name)
-    logger.setLevel(level)
-    # Create formatter
-    formatter = logging.Formatter(
-        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-    )
-    # Create console handler
-    console_handler = logging.StreamHandler(sys.stdout)
-    console_handler.setFormatter(formatter)
-    logger.addHandler(console_handler)
-    # Create file handler if log_file is specified
-    if log_file:
-        file_handler = logging.FileHandler(log_file)
-        file_handler.setFormatter(formatter)
-        logger.addHandler(file_handler)
-    return logger
-# Create default logger
-logger = setup_logger()

src/chorus_detection/visualization/__init__.py DELETED Viewed

File without changes

src/chorus_detection/visualization/plotter.py DELETED Viewed

@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Module for visualizing audio data and chorus predictions."""
-from typing import List
-import librosa
-import librosa.display
-import matplotlib.pyplot as plt
-import numpy as np
-import os
-from chorus_detection.audio.processor import AudioFeature
-def plot_meter_lines(ax: plt.Axes, meter_grid_times: np.ndarray) -> None:
-    """Draw meter grid lines on the plot.
-    Args:
-        ax: The matplotlib axes object to draw on
-        meter_grid_times: Array of times at which to draw the meter lines
-    """
-    for time in meter_grid_times:
-        ax.axvline(x=time, color='grey', linestyle='--',
-                   linewidth=1, alpha=0.6)
-def plot_predictions(audio_features: AudioFeature, binary_predictions: np.ndarray) -> None:
-    """Plot the audio waveform and overlay the predicted chorus locations.
-    Args:
-        audio_features: An object containing audio features and components
-        binary_predictions: Array of binary predictions indicating chorus locations
-    """
-    meter_grid_times = librosa.frames_to_time(
-        audio_features.meter_grid, sr=audio_features.sr, hop_length=audio_features.hop_length)
-    fig, ax = plt.subplots(figsize=(12.5, 3), dpi=96)
-    # Display harmonic and percussive components
-    librosa.display.waveshow(audio_features.y_harm, sr=audio_features.sr,
-                             alpha=0.8, ax=ax, color='deepskyblue')
-    librosa.display.waveshow(audio_features.y_perc, sr=audio_features.sr,
-                             alpha=0.7, ax=ax, color='plum')
-    plot_meter_lines(ax, meter_grid_times)
-    # Highlight chorus sections
-    for i, prediction in enumerate(binary_predictions):
-        start_time = meter_grid_times[i]
-        end_time = meter_grid_times[i + 1] if i < len(
-            meter_grid_times) - 1 else len(audio_features.y) / audio_features.sr
-        if prediction == 1:
-            ax.axvspan(start_time, end_time, color='green', alpha=0.3,
-                       label='Predicted Chorus' if i == 0 else None)
-    # Set plot limits and labels
-    ax.set_xlim([0, len(audio_features.y) / audio_features.sr])
-    ax.set_ylabel('Amplitude')
-    audio_file_name = os.path.basename(audio_features.audio_path)
-    ax.set_title(
-        f'Chorus Predictions for {os.path.splitext(audio_file_name)[0]}')
-    # Add legend
-    chorus_patch = plt.Rectangle((0, 0), 1, 1, fc='green', alpha=0.3)
-    handles, labels = ax.get_legend_handles_labels()
-    handles.append(chorus_patch)
-    labels.append('Chorus')
-    ax.legend(handles=handles, labels=labels)
-    # Set x-tick labels in minutes:seconds format
-    duration = len(audio_features.y) / audio_features.sr
-    xticks = np.arange(0, duration, 10)
-    xlabels = [f"{int(tick // 60)}:{int(tick % 60):02d}" for tick in xticks]
-    ax.set_xticks(xticks)
-    ax.set_xticklabels(xlabels)
-    plt.tight_layout()
-    plt.show(block=False)

src/download_model.py DELETED Viewed

@@ -1,188 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Script to download the chorus detection model from HuggingFace.
-This script checks if the model file exists locally, and if not, downloads it
-from the specified HuggingFace repository.
-"""
-import os
-import sys
-from pathlib import Path
-import logging
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
-)
-logger = logging.getLogger("model-downloader")
-# Debug environment info
-logger.info(f"Current working directory: {os.getcwd()}")
-logger.info(f"Python path: {sys.path}")
-logger.info(f"MODEL_REVISION: {os.environ.get('MODEL_REVISION')}")
-logger.info(f"MODEL_HF_REPO: {os.environ.get('MODEL_HF_REPO')}")
-logger.info(f"HF_MODEL_FILENAME: {os.environ.get('HF_MODEL_FILENAME')}")
-# Use huggingface_hub for better integration with HF ecosystem
-try:
-    from huggingface_hub import hf_hub_download
-    HF_HUB_AVAILABLE = True
-    logger.info("huggingface_hub is available")
-except ImportError:
-    HF_HUB_AVAILABLE = False
-    logger.warning("huggingface_hub is not available, falling back to direct download")
-    import requests
-    from tqdm import tqdm
-def download_file_with_progress(url: str, destination: Path) -> None:
-    """Download a file with a progress bar.
-    Args:
-        url: URL to download from
-        destination: Path to save the file to
-    """
-    # Create parent directories if they don't exist
-    destination.parent.mkdir(parents=True, exist_ok=True)
-    # Stream the download with progress bar
-    response = requests.get(url, stream=True)
-    response.raise_for_status()
-    total_size = int(response.headers.get('content-length', 0))
-    block_size = 1024  # 1 Kibibyte
-    logger.info(f"Downloading model from {url}")
-    logger.info(f"File size: {total_size / (1024*1024):.1f} MB")
-    with open(destination, 'wb') as file, tqdm(
-            desc=destination.name,
-            total=total_size,
-            unit='iB',
-            unit_scale=True,
-            unit_divisor=1024,
-    ) as bar:
-        for data in response.iter_content(block_size):
-            size = file.write(data)
-            bar.update(size)
-def ensure_model_exists(
-    model_filename: str = "best_model_V3.h5",
-    repo_id: str = None,
-    model_dir: Path = None,
-    hf_model_filename: str = None,
-    revision: str = None
-) -> Path:
-    """Ensure the model file exists, downloading it if necessary.
-    Args:
-        model_filename: Local filename for the model
-        repo_id: HuggingFace repository ID
-        model_dir: Directory to save the model to
-        hf_model_filename: Filename of the model in the HuggingFace repo
-        revision: Specific version of the model to use (SHA-256 hash)
-    Returns:
-        Path to the model file
-    """
-    # Get parameters from environment variables if not provided
-    if repo_id is None:
-        repo_id = os.environ.get("MODEL_HF_REPO", "dennisvdang/chorus-detection")
-    if hf_model_filename is None:
-        hf_model_filename = os.environ.get("HF_MODEL_FILENAME", "chorus_detection_crnn.h5")
-    if revision is None:
-        revision = os.environ.get("MODEL_REVISION", "20e66eb3d0788373c3bdc5b28fa2f2587b0e475f3bbc47e8ab9ff0dbdbb2df32")
-    # Handle model directory paths for different environments
-    if model_dir is None:
-        # Check if we're in HF Spaces
-        if os.environ.get("SPACE_ID"):
-            # Try several possible locations
-            possible_dirs = [
-                Path("models/CRNN"),
-                Path("/home/user/app/models/CRNN"),
-                Path("/app/models/CRNN"),
-                Path(os.getcwd()) / "models" / "CRNN"
-            ]
-            for directory in possible_dirs:
-                if directory.exists() or directory.parent.exists():
-                    model_dir = directory
-                    break
-            # If none exist, use the first option and create it
-            if model_dir is None:
-                model_dir = possible_dirs[0]
-        else:
-            model_dir = Path("models/CRNN")
-    # Make sure model_dir is a Path object
-    if isinstance(model_dir, str):
-        model_dir = Path(model_dir)
-    logger.info(f"Using model directory: {model_dir}")
-    model_path = model_dir / model_filename
-    # Log environment info when running in HF Space
-    if os.environ.get("SPACE_ID"):
-        logger.info(f"Running in Hugging Face Space: {os.environ.get('SPACE_ID')}")
-        logger.info(f"Using model repo: {repo_id}")
-        logger.info(f"Using model file: {hf_model_filename}")
-        logger.info(f"Using revision: {revision}")
-    # Check if the model already exists
-    if model_path.exists():
-        logger.info(f"Model already exists at {model_path}")
-        return model_path
-    # Create model directory if it doesn't exist
-    model_dir.mkdir(parents=True, exist_ok=True)
-    logger.info(f"Model not found at {model_path}. Downloading...")
-    try:
-        if HF_HUB_AVAILABLE:
-            # Use huggingface_hub to download the model
-            logger.info(f"Downloading model from {repo_id}/{hf_model_filename} (revision: {revision}) using huggingface_hub")
-            downloaded_path = hf_hub_download(
-                repo_id=repo_id,
-                filename=hf_model_filename,
-                local_dir=model_dir,
-                local_dir_use_symlinks=False,
-                revision=revision  # Specify the exact revision to use
-            )
-            logger.info(f"Downloaded to: {downloaded_path}")
-            # Rename if necessary
-            if os.path.basename(downloaded_path) != model_filename:
-                downloaded_path_obj = Path(downloaded_path)
-                model_path.parent.mkdir(parents=True, exist_ok=True)
-                if model_path.exists():
-                    model_path.unlink()
-                downloaded_path_obj.rename(model_path)
-                logger.info(f"Renamed {downloaded_path} to {model_path}")
-        else:
-            # Fallback to direct download if huggingface_hub is not available
-            huggingface_url = f"https://huggingface.co/{repo_id}/resolve/{revision}/{hf_model_filename}"
-            download_file_with_progress(huggingface_url, model_path)
-        logger.info(f"Successfully downloaded model to {model_path}")
-        return model_path
-    except Exception as e:
-        logger.error(f"Failed to download model: {e}", exc_info=True)
-        # Handle error more gracefully in production environment
-        if os.environ.get("SPACE_ID"):
-            logger.warning("Continuing despite model download failure")
-            return model_path
-        else:
-            sys.exit(1)
-if __name__ == "__main__":
-    ensure_model_exists()

src/streamlit_app.py DELETED Viewed

@@ -1,536 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""Streamlit web app for chorus detection in audio files.
-This module provides a web-based interface for the chorus detection system,
-allowing users to upload audio files or provide YouTube URLs for analysis.
-"""
-import os
-import sys
-import logging
-# Configure logging
-logger = logging.getLogger("streamlit-app")
-# Configure TensorFlow logging before importing TensorFlow
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow logs
-# Ensure proper import paths
-current_dir = os.path.dirname(os.path.abspath(__file__))
-root_dir = os.path.dirname(current_dir)
-if current_dir not in sys.path:
-    sys.path.insert(0, current_dir)
-if root_dir not in sys.path:
-    sys.path.insert(0, root_dir)
-# Import model downloader to ensure model is available
-try:
-    if os.path.exists(os.path.join(os.getcwd(), "download_model.py")):
-        # If in the root directory
-        from download_model import ensure_model_exists
-    else:
-        # If in the src directory
-        sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-        from download_model import ensure_model_exists
-except ImportError as e:
-    logger.error(f"Error importing ensure_model_exists: {e}")
-    try:
-        # Try alternative import
-        from src.download_model import ensure_model_exists
-    except ImportError as e2:
-        logger.error(f"Alternative import failed: {e2}")
-        raise
-import base64
-import tempfile
-import warnings
-from typing import Optional, Tuple, List
-import time
-import io
-import matplotlib.pyplot as plt
-import streamlit as st
-import tensorflow as tf
-import librosa
-import soundfile as sf
-import numpy as np
-from pydub import AudioSegment
-# Suppress warnings
-warnings.filterwarnings("ignore")  # Suppress all warnings
-tf.get_logger().setLevel('ERROR')  # Suppress TensorFlow ERROR logs
-# Debug import paths
-logger.info(f"Python path: {sys.path}")
-logger.info(f"Current working directory: {os.getcwd()}")
-# First try direct import with src in path
-try:
-    # Add src directory to Python path if not already there
-    src_path = os.path.dirname(current_dir)
-    if src_path not in sys.path:
-        sys.path.insert(0, src_path)
-    from chorus_detection.audio.data_processing import process_audio
-    from chorus_detection.audio.processor import extract_audio
-    from chorus_detection.models.crnn import load_CRNN_model, make_predictions
-    from chorus_detection.utils.cli import is_youtube_url
-    from chorus_detection.utils.logging import logger
-    logger.info("Successfully imported chorus_detection modules")
-except ImportError as e:
-    logger.error(f"Error importing chorus_detection modules: {e}")
-    logger.info("Trying alternative imports...")
-    # Try with manual path adjustment
-    try:
-        # Adjust import paths - try different directories
-        possible_paths = [
-            os.path.join(os.getcwd(), "src"),
-            os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
-            os.path.dirname(os.getcwd())
-        ]
-        for path in possible_paths:
-            if path not in sys.path and os.path.exists(path):
-                sys.path.insert(0, path)
-                logger.info(f"Added path to sys.path: {path}")
-        # Try importing directly from chorus_detection module path
-        sys.path.insert(0, os.path.join(os.getcwd(), "src", "chorus_detection"))
-        from chorus_detection.audio.data_processing import process_audio
-        from chorus_detection.audio.processor import extract_audio
-        from chorus_detection.models.crnn import load_CRNN_model, make_predictions
-        from chorus_detection.utils.cli import is_youtube_url
-        from chorus_detection.utils.logging import logger
-        logger.info("Successfully imported chorus_detection modules after path adjustment")
-    except ImportError as e2:
-        logger.error(f"Alternative imports also failed: {e2}")
-        raise
-# Define the MODEL_PATH directly
-MODEL_PATH = os.path.join(os.getcwd(), "models", "CRNN", "best_model_V3.h5")
-if not os.path.exists(MODEL_PATH):
-    MODEL_PATH = ensure_model_exists()
-# Define color scheme
-THEME_COLORS = {
-    'background': '#121212',
-    'card_bg': '#181818',
-    'primary': '#1DB954',
-    'secondary': '#1ED760',
-    'text': '#FFFFFF',
-    'subtext': '#B3B3B3',
-    'highlight': '#1DB954',
-    'border': '#333333',
-}
-def get_binary_file_downloader_html(bin_file: str, file_label: str = 'File') -> str:
-    """Generate HTML for file download link.
-    Args:
-        bin_file: Path to the binary file
-        file_label: Label for the download link
-    Returns:
-        HTML string for the download link
-    """
-    with open(bin_file, 'rb') as f:
-        data = f.read()
-    b64 = base64.b64encode(data).decode()
-    return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(bin_file)}">{file_label}</a>'
-def set_custom_theme() -> None:
-    """Apply custom Spotify-inspired theme to Streamlit UI."""
-    custom_theme = f"""
-    <style>
-        .stApp {{
-            background-color: {THEME_COLORS['background']};
-            color: {THEME_COLORS['text']};
-        }}
-        .css-18e3th9 {{
-            padding-top: 2rem;
-            padding-bottom: 10rem;
-            padding-left: 5rem;
-            padding-right: 5rem;
-        }}
-        h1, h2, h3, h4, h5, h6 {{
-            color: {THEME_COLORS['text']} !important;
-            font-weight: 700 !important;
-        }}
-        .stSidebar .sidebar-content {{
-            background-color: {THEME_COLORS['card_bg']};
-        }}
-        .stButton>button {{
-            background-color: {THEME_COLORS['primary']};
-            color: white;
-            border-radius: 500px;
-            padding: 8px 32px;
-            font-weight: 600;
-            border: none;
-            transition: all 0.3s ease;
-        }}
-        .stButton>button:hover {{
-            background-color: {THEME_COLORS['secondary']};
-            transform: scale(1.04);
-        }}
-    </style>
-    """
-    st.markdown(custom_theme, unsafe_allow_html=True)
-def process_youtube(url: str) -> Tuple[Optional[str], Optional[str]]:
-    """Process a YouTube URL and extract audio.
-    Args:
-        url: YouTube URL
-    Returns:
-        Tuple of (audio_path, video_name)
-    """
-    try:
-        with st.spinner('Downloading audio from YouTube...'):
-            audio_path, video_name = extract_audio(url)
-            return audio_path, video_name
-    except Exception as e:
-        st.error(f"Error processing YouTube URL: {e}")
-        logger.error(f"Error processing YouTube URL: {e}", exc_info=True)
-        return None, None
-def process_uploaded_file(uploaded_file) -> Tuple[Optional[str], Optional[str]]:
-    """Process an uploaded audio file.
-    Args:
-        uploaded_file: Streamlit UploadedFile object
-    Returns:
-        Tuple of (audio_path, file_name)
-    """
-    try:
-        with st.spinner('Processing uploaded file...'):
-            # Save the uploaded file to a temporary location
-            temp_dir = tempfile.mkdtemp()
-            file_name = uploaded_file.name
-            temp_path = os.path.join(temp_dir, file_name)
-            with open(temp_path, 'wb') as f:
-                f.write(uploaded_file.getbuffer())
-            return temp_path, file_name.split('.')[0]
-    except Exception as e:
-        st.error(f"Error processing uploaded file: {e}")
-        logger.error(f"Error processing uploaded file: {e}", exc_info=True)
-        return None, None
-def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.ndarray,
-                       meter_grid_times: np.ndarray) -> List[Tuple[float, float, np.ndarray]]:
-    """Extract chorus segments from predictions.
-    Args:
-        y: Audio data
-        sr: Sample rate
-        smoothed_predictions: Smoothed model predictions
-        meter_grid_times: Time grid for predictions
-    Returns:
-        List of (start_time, end_time, audio_segment) tuples
-    """
-    # Define threshold for chorus detection (probability > 0.5)
-    threshold = 0.5
-    # Find the segments where the predictions are above the threshold
-    chorus_mask = smoothed_predictions > threshold
-    # Group consecutive True values to identify segments
-    segments = []
-    current_segment = None
-    for i, is_chorus in enumerate(chorus_mask):
-        time = meter_grid_times[i]
-        if is_chorus and current_segment is None:
-            # Start a new segment
-            current_segment = (time, None, None)
-        elif not is_chorus and current_segment is not None:
-            # End the current segment
-            start_time = current_segment[0]
-            current_segment = (start_time, time, None)
-            segments.append(current_segment)
-            current_segment = None
-    # Handle the case where the last segment extends to the end of the song
-    if current_segment is not None:
-        start_time = current_segment[0]
-        segments.append((start_time, meter_grid_times[-1], None))
-    # Extract the actual audio for each segment
-    segments_with_audio = []
-    for start_time, end_time, _ in segments:
-        # Convert times to sample indices
-        start_idx = int(start_time * sr)
-        end_idx = int(end_time * sr)
-        # Extract the audio segment
-        segment_audio = y[start_idx:end_idx]
-        segments_with_audio.append((start_time, end_time, segment_audio))
-    return segments_with_audio
-def create_chorus_compilation(segments: List[Tuple[float, float, np.ndarray]],
-                         sr: int, fade_duration: float = 0.3) -> Tuple[np.ndarray, str]:
-    """Create a compilation of chorus segments.
-    Args:
-        segments: List of (start_time, end_time, audio_data) tuples
-        sr: Sample rate
-        fade_duration: Duration of fade in/out in seconds
-    Returns:
-        Tuple of (compilation_audio, description)
-    """
-    if not segments:
-        return np.array([]), "No chorus segments found"
-    # Calculate the number of samples for fading
-    fade_samples = int(fade_duration * sr)
-    # Prepare a list to store the processed segments
-    processed_segments = []
-    # Description of segments
-    segment_descriptions = []
-    # Process each segment
-    for i, (start_time, end_time, audio) in enumerate(segments):
-        # Apply fade in and fade out
-        segment_length = len(audio)
-        if segment_length <= 2 * fade_samples:
-            # Segment is too short for fading, skip it
-            continue
-        # Create a linear fade in and fade out
-        fade_in = np.linspace(0, 1, fade_samples)
-        fade_out = np.linspace(1, 0, fade_samples)
-        # Apply the fades
-        audio_faded = audio.copy()
-        audio_faded[:fade_samples] *= fade_in
-        audio_faded[-fade_samples:] *= fade_out
-        processed_segments.append(audio_faded)
-        # Format the times for the description
-        start_fmt = format_time(start_time)
-        end_fmt = format_time(end_time)
-        segment_descriptions.append(f"Chorus {i+1}: {start_fmt} - {end_fmt}")
-    if not processed_segments:
-        return np.array([]), "No chorus segments long enough for compilation"
-    # Concatenate all the processed segments
-    compilation = np.concatenate(processed_segments)
-    # Join the descriptions
-    description = "\n".join(segment_descriptions)
-    return compilation, description
-def save_audio_for_streamlit(audio_data: np.ndarray, sr: int, file_format: str = 'mp3') -> bytes:
-    """Save audio data to a format suitable for Streamlit audio playback.
-    Args:
-        audio_data: Audio samples
-        sr: Sample rate
-        file_format: Output format ('mp3', 'wav', etc.)
-    Returns:
-        Audio bytes
-    """
-    with io.BytesIO() as buffer:
-        sf.write(buffer, audio_data, sr, format=file_format)
-        buffer.seek(0)
-        return buffer.read()
-def format_time(seconds: float) -> str:
-    """Format seconds as MM:SS.
-    Args:
-        seconds: Time in seconds
-    Returns:
-        Formatted time string
-    """
-    minutes = int(seconds // 60)
-    seconds = int(seconds % 60)
-    return f"{minutes:02d}:{seconds:02d}"
-def main() -> None:
-    """Main function for the Streamlit app."""
-    # Set page config
-    st.set_page_config(
-        page_title="Chorus Detection",
-        page_icon="🎵",
-        layout="wide",
-        initial_sidebar_state="collapsed",
-    )
-    # Apply custom theme
-    set_custom_theme()
-    # App title and description
-    st.title("Chorus Detection")
-    st.markdown("""
-    <div class="subheader">
-    Upload a song or enter a YouTube URL to automatically detect chorus sections using AI
-    </div>
-    """, unsafe_allow_html=True)
-    # User input section
-    col1, col2 = st.columns(2)
-    with col1:
-        st.markdown('<div class="input-option">', unsafe_allow_html=True)
-        st.subheader("Option 1: Upload an audio file")
-        uploaded_file = st.file_uploader("Choose an audio file", type=['mp3', 'wav', 'ogg', 'flac', 'm4a'])
-        st.markdown('</div>', unsafe_allow_html=True)
-    with col2:
-        st.markdown('<div class="input-option">', unsafe_allow_html=True)
-        st.subheader("Option 2: YouTube URL")
-        youtube_url = st.text_input("Enter a YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
-        st.markdown('</div>', unsafe_allow_html=True)
-    # Process button
-    if st.button("Analyze"):
-        # Check the input method
-        audio_path = None
-        file_name = None
-        if uploaded_file is not None:
-            audio_path, file_name = process_uploaded_file(uploaded_file)
-        elif youtube_url:
-            if is_youtube_url(youtube_url):
-                audio_path, file_name = process_youtube(youtube_url)
-            else:
-                st.error("Invalid YouTube URL. Please enter a valid YouTube URL.")
-        else:
-            st.error("Please upload an audio file or enter a YouTube URL.")
-        # If we have a valid audio path, process it
-        if audio_path and file_name:
-            try:
-                # Load and process the audio file
-                with st.spinner('Processing audio...'):
-                    # Load audio and extract features
-                    y, sr = librosa.load(audio_path, sr=22050)
-                    # Create a temporary directory for model output
-                    temp_output_dir = tempfile.mkdtemp()
-                    # Load the model
-                    model = load_CRNN_model(MODEL_PATH)
-                    # Process audio and make predictions
-                    audio_features, _ = process_audio(audio_path, output_path=temp_output_dir)
-                    meter_grid_times, predictions = make_predictions(model, audio_features)
-                    # Smooth predictions to avoid rapid transitions
-                    smoothed_predictions = np.convolve(predictions,
-                                                     np.ones(5)/5,
-                                                     mode='same')
-                    # Extract chorus segments
-                    chorus_segments = extract_chorus_segments(y, sr, smoothed_predictions, meter_grid_times)
-                    # Create a chorus compilation
-                    compilation_audio, segments_desc = create_chorus_compilation(chorus_segments, sr)
-                # Display results
-                st.markdown(f"""
-                <div class="result-container">
-                    <div class="song-title">{file_name}</div>
-                </div>
-                """, unsafe_allow_html=True)
-                # Display waveform with highlighted chorus sections
-                fig, ax = plt.subplots(figsize=(14, 5))
-                # Plot the waveform
-                times = np.linspace(0, len(y)/sr, len(y))
-                ax.plot(times, y, color='#b3b3b3', alpha=0.5, linewidth=1)
-                ax.set_xlabel('Time (s)')
-                ax.set_ylabel('Amplitude')
-                ax.set_title('Audio Waveform with Chorus Sections Highlighted')
-                # Highlight chorus sections
-                for start_time, end_time, _ in chorus_segments:
-                    ax.axvspan(start_time, end_time, alpha=0.3, color=THEME_COLORS['primary'])
-                    # Add a label at the start of each chorus
-                    ax.annotate('Chorus',
-                               xy=(start_time, 0.8 * max(y)),
-                               xytext=(start_time + 0.5, 0.9 * max(y)),
-                               color=THEME_COLORS['primary'],
-                               weight='bold')
-                # Customize plot appearance
-                ax.set_facecolor(THEME_COLORS['card_bg'])
-                fig.patch.set_facecolor(THEME_COLORS['background'])
-                ax.spines['top'].set_visible(False)
-                ax.spines['right'].set_visible(False)
-                ax.spines['bottom'].set_color(THEME_COLORS['border'])
-                ax.spines['left'].set_color(THEME_COLORS['border'])
-                ax.tick_params(axis='x', colors=THEME_COLORS['text'])
-                ax.tick_params(axis='y', colors=THEME_COLORS['text'])
-                ax.xaxis.label.set_color(THEME_COLORS['text'])
-                ax.yaxis.label.set_color(THEME_COLORS['text'])
-                ax.title.set_color(THEME_COLORS['text'])
-                st.pyplot(fig)
-                # Display chorus segments
-                if chorus_segments:
-                    st.markdown('<div class="chorus-card">', unsafe_allow_html=True)
-                    st.subheader("Chorus Segments")
-                    for i, (start_time, end_time, segment_audio) in enumerate(chorus_segments):
-                        st.markdown(f"""
-                        <div class="time-stamp">Chorus {i+1}: {format_time(start_time)} - {format_time(end_time)}</div>
-                        """, unsafe_allow_html=True)
-                        # Convert segment audio to bytes for playback
-                        audio_bytes = save_audio_for_streamlit(segment_audio, sr)
-                        st.audio(audio_bytes, format='audio/mp3')
-                    st.markdown('</div>', unsafe_allow_html=True)
-                    # Chorus compilation
-                    if len(compilation_audio) > 0:
-                        st.markdown('<div class="chorus-card">', unsafe_allow_html=True)
-                        st.subheader("Chorus Compilation")
-                        st.markdown("All chorus segments combined into one track:")
-                        compilation_bytes = save_audio_for_streamlit(compilation_audio, sr)
-                        st.audio(compilation_bytes, format='audio/mp3')
-                        st.markdown('</div>', unsafe_allow_html=True)
-                else:
-                    st.info("No chorus sections detected in this audio.")
-            except Exception as e:
-                st.error(f"Error processing audio: {e}")
-                logger.error(f"Error processing audio: {e}", exc_info=True)
-if __name__ == "__main__":
-    main()

streamlit_app.py CHANGED Viewed

@@ -1,35 +1,18 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-"""Streamlit web app for chorus detection in audio files.
-This module provides a web-based interface for the chorus detection system,
-allowing users to upload audio files or provide YouTube URLs for analysis.
 """
 import os
 import sys
 import logging
-# Configure logging
-logger = logging.getLogger("streamlit-app")
-# Configure TensorFlow logging before importing TensorFlow
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow logs
-# Import model downloader to ensure model is available
-try:
-    from download_model import ensure_model_exists
-except ImportError as e:
-    logger.error(f"Error importing ensure_model_exists: {e}")
-    raise
 import base64
 import tempfile
 import warnings
-from typing import Optional, Tuple, List
-import time
 import io
 import matplotlib.pyplot as plt
 import streamlit as st
@@ -39,33 +22,33 @@ import soundfile as sf
 import numpy as np
 from pydub import AudioSegment
-# Suppress warnings
-warnings.filterwarnings("ignore")  # Suppress all warnings
-tf.get_logger().setLevel('ERROR')  # Suppress TensorFlow ERROR logs
-# Debug import paths
-logger.info(f"Python path: {sys.path}")
-logger.info(f"Current working directory: {os.getcwd()}")
-# Import modules
 try:
     from chorus_detection.audio.data_processing import process_audio
     from chorus_detection.audio.processor import extract_audio
     from chorus_detection.models.crnn import load_CRNN_model, make_predictions
     from chorus_detection.utils.cli import is_youtube_url
     from chorus_detection.utils.logging import logger
     logger.info("Successfully imported chorus_detection modules")
 except ImportError as e:
-    logger.error(f"Error importing chorus_detection modules: {e}")
     raise
-# Define the MODEL_PATH directly
 MODEL_PATH = os.path.join(os.getcwd(), "models", "CRNN", "best_model_V3.h5")
 if not os.path.exists(MODEL_PATH):
     MODEL_PATH = ensure_model_exists()
-# Define color scheme
 THEME_COLORS = {
     'background': '#121212',
     'card_bg': '#181818',
@@ -79,15 +62,7 @@ THEME_COLORS = {
 def get_binary_file_downloader_html(bin_file: str, file_label: str = 'File') -> str:
-    """Generate HTML for file download link.
-    Args:
-        bin_file: Path to the binary file
-        file_label: Label for the download link
-    Returns:
-        HTML string for the download link
-    """
     with open(bin_file, 'rb') as f:
         data = f.read()
     b64 = base64.b64encode(data).decode()
@@ -134,14 +109,7 @@ def set_custom_theme() -> None:
 def process_youtube(url: str) -> Tuple[Optional[str], Optional[str]]:
-    """Process a YouTube URL and extract audio.
-    Args:
-        url: YouTube URL
-    Returns:
-        Tuple of (audio_path, video_name)
-    """
     try:
         with st.spinner('Downloading audio from YouTube...'):
             audio_path, video_name = extract_audio(url)
@@ -153,17 +121,9 @@ def process_youtube(url: str) -> Tuple[Optional[str], Optional[str]]:
 def process_uploaded_file(uploaded_file) -> Tuple[Optional[str], Optional[str]]:
-    """Process an uploaded audio file.
-    Args:
-        uploaded_file: Streamlit UploadedFile object
-    Returns:
-        Tuple of (audio_path, file_name)
-    """
     try:
         with st.spinner('Processing uploaded file...'):
-            # Save the uploaded file to a temporary location
             temp_dir = tempfile.mkdtemp()
             file_name = uploaded_file.name
             temp_path = os.path.join(temp_dir, file_name)
@@ -180,24 +140,9 @@ def process_uploaded_file(uploaded_file) -> Tuple[Optional[str], Optional[str]]:
 def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.ndarray,
                        meter_grid_times: np.ndarray) -> List[Tuple[float, float, np.ndarray]]:
-    """Extract chorus segments from predictions.
-    Args:
-        y: Audio data
-        sr: Sample rate
-        smoothed_predictions: Smoothed model predictions
-        meter_grid_times: Time grid for predictions
-    Returns:
-        List of (start_time, end_time, audio_segment) tuples
-    """
-    # Define threshold for chorus detection (probability > 0.5)
     threshold = 0.5
-    # Find the segments where the predictions are above the threshold
     chorus_mask = smoothed_predictions > threshold
-    # Group consecutive True values to identify segments
     segments = []
     current_segment = None
@@ -205,10 +150,8 @@ def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.nda
         time = meter_grid_times[i]
         if is_chorus and current_segment is None:
-            # Start a new segment
             current_segment = (time, None, None)
         elif not is_chorus and current_segment is not None:
-            # End the current segment
             start_time = current_segment[0]
             current_segment = (start_time, time, None)
             segments.append(current_segment)
@@ -222,13 +165,9 @@ def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.nda
     # Extract the actual audio for each segment
     segments_with_audio = []
     for start_time, end_time, _ in segments:
-        # Convert times to sample indices
         start_idx = int(start_time * sr)
         end_idx = int(end_time * sr)
-        # Extract the audio segment
         segment_audio = y[start_idx:end_idx]
         segments_with_audio.append((start_time, end_time, segment_audio))
     return segments_with_audio
@@ -236,49 +175,29 @@ def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.nda
 def create_chorus_compilation(segments: List[Tuple[float, float, np.ndarray]],
                          sr: int, fade_duration: float = 0.3) -> Tuple[np.ndarray, str]:
-    """Create a compilation of chorus segments.
-    Args:
-        segments: List of (start_time, end_time, audio_data) tuples
-        sr: Sample rate
-        fade_duration: Duration of fade in/out in seconds
-    Returns:
-        Tuple of (compilation_audio, description)
-    """
     if not segments:
         return np.array([]), "No chorus segments found"
-    # Calculate the number of samples for fading
     fade_samples = int(fade_duration * sr)
-    # Prepare a list to store the processed segments
     processed_segments = []
-    # Description of segments
     segment_descriptions = []
-    # Process each segment
     for i, (start_time, end_time, audio) in enumerate(segments):
-        # Apply fade in and fade out
         segment_length = len(audio)
         if segment_length <= 2 * fade_samples:
-            # Segment is too short for fading, skip it
             continue
-        # Create a linear fade in and fade out
         fade_in = np.linspace(0, 1, fade_samples)
         fade_out = np.linspace(1, 0, fade_samples)
-        # Apply the fades
         audio_faded = audio.copy()
         audio_faded[:fade_samples] *= fade_in
         audio_faded[-fade_samples:] *= fade_out
         processed_segments.append(audio_faded)
-        # Format the times for the description
         start_fmt = format_time(start_time)
         end_fmt = format_time(end_time)
         segment_descriptions.append(f"Chorus {i+1}: {start_fmt} - {end_fmt}")
@@ -286,26 +205,14 @@ def create_chorus_compilation(segments: List[Tuple[float, float, np.ndarray]],
     if not processed_segments:
         return np.array([]), "No chorus segments long enough for compilation"
-    # Concatenate all the processed segments
     compilation = np.concatenate(processed_segments)
-    # Join the descriptions
     description = "\n".join(segment_descriptions)
     return compilation, description
 def save_audio_for_streamlit(audio_data: np.ndarray, sr: int, file_format: str = 'mp3') -> bytes:
-    """Save audio data to a format suitable for Streamlit audio playback.
-    Args:
-        audio_data: Audio samples
-        sr: Sample rate
-        file_format: Output format ('mp3', 'wav', etc.)
-    Returns:
-        Audio bytes
-    """
     with io.BytesIO() as buffer:
         sf.write(buffer, audio_data, sr, format=file_format)
         buffer.seek(0)
@@ -313,14 +220,7 @@ def save_audio_for_streamlit(audio_data: np.ndarray, sr: int, file_format: str =
 def format_time(seconds: float) -> str:
-    """Format seconds as MM:SS.
-    Args:
-        seconds: Time in seconds
-    Returns:
-        Formatted time string
-    """
     minutes = int(seconds // 60)
     seconds = int(seconds % 60)
     return f"{minutes:02d}:{seconds:02d}"
@@ -385,11 +285,7 @@ def main() -> None:
                 with st.spinner('Processing audio...'):
                     # Load audio and extract features
                     y, sr = librosa.load(audio_path, sr=22050)
-                    # Create a temporary directory for model output
                     temp_output_dir = tempfile.mkdtemp()
-                    # Load the model
                     model = load_CRNN_model(MODEL_PATH)
                     # Process audio and make predictions
@@ -397,14 +293,10 @@ def main() -> None:
                     meter_grid_times, predictions = make_predictions(model, audio_features)
                     # Smooth predictions to avoid rapid transitions
-                    smoothed_predictions = np.convolve(predictions,
-                                                     np.ones(5)/5,
-                                                     mode='same')
-                    # Extract chorus segments
                     chorus_segments = extract_chorus_segments(y, sr, smoothed_predictions, meter_grid_times)
-                    # Create a chorus compilation
                     compilation_audio, segments_desc = create_chorus_compilation(chorus_segments, sr)
                 # Display results
@@ -427,8 +319,6 @@ def main() -> None:
                 # Highlight chorus sections
                 for start_time, end_time, _ in chorus_segments:
                     ax.axvspan(start_time, end_time, alpha=0.3, color=THEME_COLORS['primary'])
-                    # Add a label at the start of each chorus
                     ax.annotate('Chorus',
                                xy=(start_time, 0.8 * max(y)),
                                xytext=(start_time + 0.5, 0.9 * max(y)),

 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
+"""
+Streamlit web app for chorus detection in audio files.
 """
 import os
 import sys
 import logging
 import base64
 import tempfile
 import warnings
 import io
+from typing import Optional, Tuple, List
 import matplotlib.pyplot as plt
 import streamlit as st
 import numpy as np
 from pydub import AudioSegment
+# Configure logging
+logger = logging.getLogger("streamlit-app")
+# Suppress TensorFlow and other warnings
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+warnings.filterwarnings("ignore")
+tf.get_logger().setLevel('ERROR')
+# Import components
 try:
+    from download_model import ensure_model_exists
     from chorus_detection.audio.data_processing import process_audio
     from chorus_detection.audio.processor import extract_audio
     from chorus_detection.models.crnn import load_CRNN_model, make_predictions
     from chorus_detection.utils.cli import is_youtube_url
     from chorus_detection.utils.logging import logger
     logger.info("Successfully imported chorus_detection modules")
 except ImportError as e:
+    logger.error(f"Error importing modules: {e}")
     raise
+# Define model path
 MODEL_PATH = os.path.join(os.getcwd(), "models", "CRNN", "best_model_V3.h5")
 if not os.path.exists(MODEL_PATH):
     MODEL_PATH = ensure_model_exists()
+# UI theme colors
 THEME_COLORS = {
     'background': '#121212',
     'card_bg': '#181818',
 def get_binary_file_downloader_html(bin_file: str, file_label: str = 'File') -> str:
+    """Generate HTML for file download link."""
     with open(bin_file, 'rb') as f:
         data = f.read()
     b64 = base64.b64encode(data).decode()
 def process_youtube(url: str) -> Tuple[Optional[str], Optional[str]]:
+    """Process a YouTube URL and extract audio."""
     try:
         with st.spinner('Downloading audio from YouTube...'):
             audio_path, video_name = extract_audio(url)
 def process_uploaded_file(uploaded_file) -> Tuple[Optional[str], Optional[str]]:
+    """Process an uploaded audio file."""
     try:
         with st.spinner('Processing uploaded file...'):
             temp_dir = tempfile.mkdtemp()
             file_name = uploaded_file.name
             temp_path = os.path.join(temp_dir, file_name)
 def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.ndarray,
                        meter_grid_times: np.ndarray) -> List[Tuple[float, float, np.ndarray]]:
+    """Extract chorus segments from predictions."""
     threshold = 0.5
     chorus_mask = smoothed_predictions > threshold
     segments = []
     current_segment = None
         time = meter_grid_times[i]
         if is_chorus and current_segment is None:
             current_segment = (time, None, None)
         elif not is_chorus and current_segment is not None:
             start_time = current_segment[0]
             current_segment = (start_time, time, None)
             segments.append(current_segment)
     # Extract the actual audio for each segment
     segments_with_audio = []
     for start_time, end_time, _ in segments:
         start_idx = int(start_time * sr)
         end_idx = int(end_time * sr)
         segment_audio = y[start_idx:end_idx]
         segments_with_audio.append((start_time, end_time, segment_audio))
     return segments_with_audio
 def create_chorus_compilation(segments: List[Tuple[float, float, np.ndarray]],
                          sr: int, fade_duration: float = 0.3) -> Tuple[np.ndarray, str]:
+    """Create a compilation of chorus segments."""
     if not segments:
         return np.array([]), "No chorus segments found"
     fade_samples = int(fade_duration * sr)
     processed_segments = []
     segment_descriptions = []
     for i, (start_time, end_time, audio) in enumerate(segments):
         segment_length = len(audio)
         if segment_length <= 2 * fade_samples:
             continue
         fade_in = np.linspace(0, 1, fade_samples)
         fade_out = np.linspace(1, 0, fade_samples)
         audio_faded = audio.copy()
         audio_faded[:fade_samples] *= fade_in
         audio_faded[-fade_samples:] *= fade_out
         processed_segments.append(audio_faded)
         start_fmt = format_time(start_time)
         end_fmt = format_time(end_time)
         segment_descriptions.append(f"Chorus {i+1}: {start_fmt} - {end_fmt}")
     if not processed_segments:
         return np.array([]), "No chorus segments long enough for compilation"
     compilation = np.concatenate(processed_segments)
     description = "\n".join(segment_descriptions)
     return compilation, description
 def save_audio_for_streamlit(audio_data: np.ndarray, sr: int, file_format: str = 'mp3') -> bytes:
+    """Save audio data to a format suitable for Streamlit audio playback."""
     with io.BytesIO() as buffer:
         sf.write(buffer, audio_data, sr, format=file_format)
         buffer.seek(0)
 def format_time(seconds: float) -> str:
+    """Format seconds as MM:SS."""
     minutes = int(seconds // 60)
     seconds = int(seconds % 60)
     return f"{minutes:02d}:{seconds:02d}"
                 with st.spinner('Processing audio...'):
                     # Load audio and extract features
                     y, sr = librosa.load(audio_path, sr=22050)
                     temp_output_dir = tempfile.mkdtemp()
                     model = load_CRNN_model(MODEL_PATH)
                     # Process audio and make predictions
                     meter_grid_times, predictions = make_predictions(model, audio_features)
                     # Smooth predictions to avoid rapid transitions
+                    smoothed_predictions = np.convolve(predictions, np.ones(5)/5, mode='same')
+                    # Extract chorus segments and create compilation
                     chorus_segments = extract_chorus_segments(y, sr, smoothed_predictions, meter_grid_times)
                     compilation_audio, segments_desc = create_chorus_compilation(chorus_segments, sr)
                 # Display results
                 # Highlight chorus sections
                 for start_time, end_time, _ in chorus_segments:
                     ax.axvspan(start_time, end_time, alpha=0.3, color=THEME_COLORS['primary'])
                     ax.annotate('Chorus',
                                xy=(start_time, 0.8 * max(y)),
                                xytext=(start_time + 0.5, 0.9 * max(y)),