Spaces:

dennisvdang
/

Chorus-Detection

Running

App Files Files Community

dennisvdang commited on May 16

Commit

da764f1

0 Parent(s):

Initial commit for Hugging Face Space

Browse files

Files changed (6) hide show

.space/app-entrypoint.sh +19 -0
.space/config.json +11 -0
README.md +27 -0
app.py +653 -0
download_model.py +128 -0
requirements.txt +28 -0

.space/app-entrypoint.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/bash
+# Check if we're running on Hugging Face Space
+if [ -n "$SPACE_ID" ]; then
+    echo "Running on Hugging Face Space: $SPACE_ID"
+else
+    echo "Running locally"
+fi
+# Create necessary directories
+mkdir -p models/CRNN
+# Run model download script to ensure model is available
+echo "Checking for model files..."
+python src/download_model.py
+# Start the Streamlit app
+echo "Starting Streamlit app..."
+streamlit run src/app.py --server.address=0.0.0.0 --server.port=7860 --server.enableCORS=false --server.enableXsrfProtection=false

.space/config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "app_file": "src/app.py",
+  "docker_build_args": {
+    "MODEL_HF_REPO": "dennisvdang/chorus-detection"
+  },
+  "sdk": "streamlit",
+  "python_requirements": "requirements.txt",
+  "suggested_hardware": "t4-small",
+  "suggested_cuda": "11.8",
+  "app_entrypoint": ".space/app-entrypoint.sh"
+}

README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+title: Chorus Detection
+emoji: 🎵
+colorFrom: purple
+colorTo: green
+sdk: streamlit
+sdk_version: "1.26.0"
+app_file: app.py
+pinned: false
+---
+# Chorus Detection App
+This Streamlit app uses a Convolutional Recurrent Neural Network (CRNN) to automatically detect chorus sections in music tracks.
+## Features
+- Detect and extract chorus sections in songs
+- Upload audio files or provide YouTube URLs for analysis
+- Display waveform visualization with highlighted chorus sections
+- Create playable snippets of detected choruses
+## About the Model
+The model was trained on a dataset of 332 manually labeled songs from various genres using a CRNN architecture. It achieved an F1 score of 0.864 (Precision: 0.831, Recall: 0.900) on an unseen test set.
+For more information, visit the [GitHub repository](https://github.com/dennisvdang/chorus-detection).

app.py ADDED Viewed

	@@ -0,0 +1,653 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Streamlit web app for chorus detection in audio files.
+This module provides a web-based interface for the chorus detection system,
+allowing users to upload audio files or provide YouTube URLs for analysis.
+"""
+import os
+# Configure TensorFlow logging before importing TensorFlow
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow logs
+# Import model downloader to ensure model is available
+try:
+    from download_model import ensure_model_exists
+except ImportError:
+    from src.download_model import ensure_model_exists
+import base64
+import tempfile
+import warnings
+from typing import Optional, Tuple, List
+import time
+import io
+import matplotlib.pyplot as plt
+import streamlit as st
+import tensorflow as tf
+import librosa
+import soundfile as sf
+import numpy as np
+from pydub import AudioSegment
+# Suppress warnings
+warnings.filterwarnings("ignore")  # Suppress all warnings
+tf.get_logger().setLevel('ERROR')  # Suppress TensorFlow ERROR logs
+from chorus_detection.audio.data_processing import process_audio
+from chorus_detection.audio.processor import extract_audio
+from chorus_detection.config import MODEL_PATH
+from chorus_detection.models.crnn import load_CRNN_model, make_predictions
+from chorus_detection.utils.cli import is_youtube_url
+from chorus_detection.utils.logging import logger
+# Ensure the model is downloaded before proceeding
+MODEL_PATH = ensure_model_exists()
+# Define color scheme
+THEME_COLORS = {
+    'background': '#121212',
+    'card_bg': '#181818',
+    'primary': '#1DB954',
+    'secondary': '#1ED760',
+    'text': '#FFFFFF',
+    'subtext': '#B3B3B3',
+    'highlight': '#1DB954',
+    'border': '#333333',
+}
+def get_binary_file_downloader_html(bin_file: str, file_label: str = 'File') -> str:
+    """Generate HTML for file download link.
+    Args:
+        bin_file: Path to the binary file
+        file_label: Label for the download link
+    Returns:
+        HTML string for the download link
+    """
+    with open(bin_file, 'rb') as f:
+        data = f.read()
+    b64 = base64.b64encode(data).decode()
+    return f'<a href="data:application/octet-stream;base64,{b64}" download="{os.path.basename(bin_file)}">{file_label}</a>'
+def set_custom_theme() -> None:
+    """Apply custom Spotify-inspired theme to Streamlit UI."""
+    custom_theme = f"""
+    <style>
+        .stApp {{
+            background-color: {THEME_COLORS['background']};
+            color: {THEME_COLORS['text']};
+        }}
+        .css-18e3th9 {{
+            padding-top: 2rem;
+            padding-bottom: 10rem;
+            padding-left: 5rem;
+            padding-right: 5rem;
+        }}
+        h1, h2, h3, h4, h5, h6 {{
+            color: {THEME_COLORS['text']} !important;
+            font-weight: 700 !important;
+        }}
+        .stSidebar .sidebar-content {{
+            background-color: {THEME_COLORS['card_bg']};
+        }}
+        .stButton>button {{
+            background-color: {THEME_COLORS['primary']};
+            color: white;
+            border-radius: 500px;
+            padding: 8px 32px;
+            font-weight: 600;
+            border: none;
+            transition: all 0.3s ease;
+        }}
+        .stButton>button:hover {{
+            background-color: {THEME_COLORS['secondary']};
+            transform: scale(1.04);
+        }}
+        .stTextInput>div>div>input,
+        .stFileUploader>div>div {{
+            background-color: {THEME_COLORS['card_bg']};
+            color: {THEME_COLORS['text']};
+            border: 1px solid {THEME_COLORS['border']};
+            border-radius: 4px;
+        }}
+        .stExpander {{
+            background-color: {THEME_COLORS['card_bg']};
+            border-radius: 8px;
+            margin-bottom: 10px;
+            border: 1px solid {THEME_COLORS['border']};
+        }}
+        .stExpander>div {{
+            border: none !important;
+        }}
+        .chorus-card {{
+            background-color: {THEME_COLORS['card_bg']};
+            border-radius: 8px;
+            padding: 20px;
+            margin-bottom: 15px;
+            border: 1px solid {THEME_COLORS['border']};
+        }}
+        .result-container {{
+            padding: 20px;
+            border-radius: 8px;
+            background-color: {THEME_COLORS['card_bg']};
+            margin-bottom: 20px;
+            border: 1px solid {THEME_COLORS['border']};
+        }}
+        .song-title {{
+            font-size: 24px;
+            font-weight: 700;
+            color: {THEME_COLORS['text']};
+            margin-bottom: 10px;
+        }}
+        .time-stamp {{
+            color: {THEME_COLORS['primary']};
+            font-weight: 600;
+        }}
+        audio {{
+            width: 100%;
+            border-radius: 500px;
+            margin-top: 10px;
+        }}
+        .stAlert {{
+            background-color: {THEME_COLORS['card_bg']};
+            color: {THEME_COLORS['text']};
+            border: 1px solid {THEME_COLORS['border']};
+        }}
+        .stRadio > div {{
+            gap: 1rem;
+        }}
+        .stRadio label {{
+            background-color: {THEME_COLORS['card_bg']};
+            padding: 10px 20px;
+            border-radius: 500px;
+            margin-right: 10px;
+            border: 1px solid {THEME_COLORS['border']};
+        }}
+        .stRadio label:hover {{
+            border-color: {THEME_COLORS['primary']};
+        }}
+        .stRadio [data-baseweb="radio"] {{
+            margin-right: 20px;
+        }}
+        .subheader {{
+            color: {THEME_COLORS['subtext']};
+            font-size: 14px;
+            margin-bottom: 20px;
+        }}
+        .input-option {{
+            background-color: {THEME_COLORS['card_bg']};
+            border-radius: 10px;
+            padding: 25px;
+            margin-bottom: 20px;
+            border: 1px solid {THEME_COLORS['border']};
+        }}
+        .or-divider {{
+            text-align: center;
+            font-size: 18px;
+            font-weight: 600;
+            color: {THEME_COLORS['text']};
+            margin: 20px 0;
+            position: relative;
+        }}
+        .or-divider:before, .or-divider:after {{
+            content: "";
+            display: inline-block;
+            width: 40%;
+            margin: 0 10px;
+            vertical-align: middle;
+            border-top: 1px solid {THEME_COLORS['border']};
+        }}
+    </style>
+    """
+    st.markdown(custom_theme, unsafe_allow_html=True)
+def process_youtube(url: str) -> Tuple[Optional[str], Optional[str]]:
+    """Process a YouTube URL and extract audio.
+    Args:
+        url: YouTube URL to process
+    Returns:
+        Tuple containing the path to the extracted audio file and the video title
+    """
+    progress_bar = st.progress(0)
+    status_text = st.empty()
+    try:
+        status_text.text("Getting video information...")
+        progress_bar.progress(10)
+        status_text.text("Downloading audio from YouTube...")
+        progress_bar.progress(30)
+        # Use yt-dlp to download the video
+        audio_path, video_name = extract_audio(url)
+        if not audio_path:
+            status_text.text("Download failed.")
+            progress_bar.progress(100)
+            st.error("Failed to extract audio from the provided URL.")
+            st.info("Try downloading the video manually and uploading it instead.")
+            return None, None
+        progress_bar.progress(90)
+        status_text.text(f"Successfully downloaded '{video_name}'")
+        progress_bar.progress(100)
+        return audio_path, video_name
+    except Exception as e:
+        import traceback
+        progress_bar.progress(100)
+        status_text.text("Download failed with an error.")
+        st.error(f"Failed to extract audio: {str(e)}")
+        st.code(traceback.format_exc())
+        return None, None
+def process_uploaded_file(uploaded_file) -> Tuple[Optional[str], Optional[str]]:
+    """Process an uploaded audio file.
+    Args:
+        uploaded_file: File uploaded through Streamlit
+    Returns:
+        Tuple containing the path to the saved file and the file name
+    """
+    try:
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as tmp:
+            tmp.write(uploaded_file.getvalue())
+            audio_path = tmp.name
+        return audio_path, uploaded_file.name
+    except Exception as e:
+        st.error(f"Error processing uploaded file: {e}")
+        return None, None
+def extract_chorus_segments(y: np.ndarray, sr: int, smoothed_predictions: np.ndarray,
+                           meter_grid_times: np.ndarray) -> List[Tuple[float, float, np.ndarray]]:
+    """Extract chorus segments from the audio array with 1 second before each chorus.
+    Args:
+        y: Audio array
+        sr: Sample rate
+        smoothed_predictions: Array of binary predictions
+        meter_grid_times: Array of meter grid times
+    Returns:
+        List of tuples (start_time, end_time, audio_segment)
+    """
+    # Find continuous chorus segments
+    chorus_segments = []
+    start_idx = None
+    for i, pred in enumerate(smoothed_predictions):
+        if pred == 1 and (i == 0 or smoothed_predictions[i-1] == 0):
+            start_idx = i
+        elif pred == 0 and start_idx is not None:
+            # Found the end of a segment
+            start_time = meter_grid_times[start_idx]
+            end_time = meter_grid_times[i]
+            chorus_segments.append((start_idx, i, start_time, end_time))
+            start_idx = None
+    # Handle the case where the last segment extends to the end
+    if start_idx is not None:
+        start_time = meter_grid_times[start_idx]
+        end_time = meter_grid_times[-1] if len(meter_grid_times) > start_idx + 1 else len(y) / sr
+        chorus_segments.append((start_idx, len(smoothed_predictions), start_time, end_time))
+    # Extract the audio segments with 1 second before each chorus
+    extracted_segments = []
+    for _, _, start_time, end_time in chorus_segments:
+        # Add 1 second before the chorus starts
+        adjusted_start_time = max(0, start_time - 1.0)
+        # Convert times to samples
+        start_sample = int(adjusted_start_time * sr)
+        end_sample = min(len(y), int(end_time * sr))
+        # Extract the segment
+        segment = y[start_sample:end_sample]
+        extracted_segments.append((adjusted_start_time, end_time, segment))
+    return extracted_segments
+def create_chorus_compilation(segments: List[Tuple[float, float, np.ndarray]],
+                             sr: int, fade_duration: float = 0.3) -> Tuple[np.ndarray, str]:
+    """Create a compilation of all chorus segments with fading between segments.
+    Args:
+        segments: List of tuples (start_time, end_time, audio_segment)
+        sr: Sample rate
+        fade_duration: Duration of fade in/out in seconds
+    Returns:
+        Tuple containing the compiled audio array and a string with timing info
+    """
+    if not segments:
+        return np.array([]), ""
+    # Create a compilation of all segments
+    compilation = np.array([])
+    timing_info = ""
+    current_position = 0
+    for i, (start_time, end_time, segment) in enumerate(segments):
+        # Add 0.5 seconds of silence between segments
+        if i > 0:
+            silence_samples = int(0.5 * sr)
+            compilation = np.concatenate([compilation, np.zeros(silence_samples)])
+            current_position += 0.5
+        # Add segment info to timing
+        minutes_start = int(current_position // 60)
+        seconds_start = int(current_position % 60)
+        # Add the segment
+        compilation = np.concatenate([compilation, segment])
+        # Update current position
+        segment_duration = len(segment) / sr
+        current_position += segment_duration
+        minutes_end = int(current_position // 60)
+        seconds_end = int(current_position % 60)
+        # Original times in the song
+        orig_min_start = int(start_time // 60)
+        orig_sec_start = int(start_time % 60)
+        orig_min_end = int(end_time // 60)
+        orig_sec_end = int(end_time % 60)
+        # Add timing info
+        timing_info += f"Chorus {i+1}: {minutes_start}:{seconds_start:02d} - {minutes_end}:{seconds_end:02d} "
+        timing_info += f"(Original: {orig_min_start}:{orig_sec_start:02d} - {orig_min_end}:{orig_sec_end:02d})\n"
+    return compilation, timing_info
+def save_audio_for_streamlit(audio_data: np.ndarray, sr: int, file_format: str = 'mp3') -> bytes:
+    """Save audio data to a BytesIO object for use with st.audio.
+    Args:
+        audio_data: Audio array
+        sr: Sample rate
+        file_format: Audio file format
+    Returns:
+        BytesIO object containing the audio data
+    """
+    buffer = io.BytesIO()
+    sf.write(buffer, audio_data, sr, format=file_format)
+    buffer.seek(0)
+    return buffer
+def format_time(seconds: float) -> str:
+    """Format time in seconds to MM:SS format.
+    Args:
+        seconds: Time in seconds
+    Returns:
+        Formatted time string
+    """
+    minutes = int(seconds // 60)
+    secs = int(seconds % 60)
+    return f"{minutes}:{secs:02d}"
+def create_waveform_visualization(audio_features, smoothed_predictions, meter_grid_times):
+    """Create waveform visualization with highlighted chorus sections.
+    Args:
+        audio_features: Audio features
+        smoothed_predictions: Array of binary predictions
+        meter_grid_times: Array of meter grid times
+    Returns:
+        Matplotlib figure with visualization
+    """
+    from chorus_detection.visualization.plotter import plot_meter_lines
+    # Set Matplotlib style to be dark and minimal
+    plt.style.use('dark_background')
+    fig, ax = plt.subplots(figsize=(12, 4), dpi=120)
+    # Display harmonic and percussive components
+    librosa.display.waveshow(audio_features.y_harm, sr=audio_features.sr,
+                           alpha=0.8, ax=ax, color='#1DB954')  # Primary color
+    librosa.display.waveshow(audio_features.y_perc, sr=audio_features.sr,
+                           alpha=0.7, ax=ax, color='#B3B3B3')  # Light gray
+    plot_meter_lines(ax, meter_grid_times)
+    # Highlight chorus sections
+    for i, prediction in enumerate(smoothed_predictions):
+        start_time = meter_grid_times[i]
+        end_time = meter_grid_times[i + 1] if i < len(
+            meter_grid_times) - 1 else len(audio_features.y) / audio_features.sr
+        if prediction == 1:
+            ax.axvspan(start_time, end_time, color='#1DB954', alpha=0.3,
+                    label='Predicted Chorus' if i == 0 else None)
+    # Set plot limits and labels
+    ax.set_xlim([0, len(audio_features.y) / audio_features.sr])
+    ax.set_ylabel('Amplitude', color='#FFFFFF')
+    # Add legend
+    chorus_patch = plt.Rectangle((0, 0), 1, 1, fc='#1DB954', alpha=0.3)
+    handles, labels = ax.get_legend_handles_labels()
+    handles.append(chorus_patch)
+    labels.append('Chorus')
+    ax.legend(handles=handles, labels=labels)
+    # Set x-tick labels in minutes:seconds format
+    duration = len(audio_features.y) / audio_features.sr
+    xticks = [i for i in range(0, int(duration) + 10, 30)]  # Every 30 seconds
+    xlabels = [f"{int(tick // 60)}:{int(tick % 60):02d}" for tick in xticks]
+    ax.set_xticks(xticks)
+    ax.set_xticklabels(xlabels, color='#FFFFFF')
+    ax.tick_params(axis='y', colors='#FFFFFF')
+    # Style the plot
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.spines['bottom'].set_color('#333333')
+    ax.spines['left'].set_color('#333333')
+    ax.set_facecolor('#121212')
+    fig.patch.set_facecolor('#121212')
+    plt.tight_layout()
+    return fig
+def analyze_audio(audio_path: str, video_name: str, model_path: str = str(MODEL_PATH)) -> None:
+    """Analyze audio file and display predictions.
+    Args:
+        audio_path: Path to the audio file
+        video_name: Name of the video or audio file
+        model_path: Path to the model file
+    """
+    try:
+        # Process audio
+        with st.spinner("Processing audio..."):
+            processed_audio, audio_features = process_audio(audio_path)
+        if processed_audio is None:
+            st.error("Failed to process audio. Please try a different file.")
+            return
+        # Load model
+        with st.spinner("Loading model..."):
+            model = load_CRNN_model(model_path=model_path)
+        # Make predictions
+        with st.spinner("Generating predictions..."):
+            smoothed_predictions = make_predictions(model, processed_audio, audio_features, None, None)
+            # Get chorus start times
+            meter_grid_times = librosa.frames_to_time(
+                audio_features.meter_grid, sr=audio_features.sr, hop_length=audio_features.hop_length)
+            chorus_start_times = [
+                meter_grid_times[i] for i in range(len(smoothed_predictions))
+                if smoothed_predictions[i] == 1 and (i == 0 or smoothed_predictions[i - 1] == 0)
+            ]
+        # Extract chorus segments
+        chorus_segments = []
+        chorus_audio = None
+        if chorus_start_times:
+            with st.spinner("Extracting chorus segments..."):
+                chorus_segments = extract_chorus_segments(
+                    audio_features.y, audio_features.sr, smoothed_predictions, meter_grid_times)
+                compilation, _ = create_chorus_compilation(
+                    chorus_segments, audio_features.sr)
+                if len(compilation) > 0:
+                    chorus_audio = save_audio_for_streamlit(compilation, audio_features.sr)
+        # Create waveform visualization
+        waveform_fig = create_waveform_visualization(audio_features, smoothed_predictions, meter_grid_times)
+        # Display results in custom-style container
+        st.markdown('<div class="result-container">', unsafe_allow_html=True)
+        st.subheader("Results")
+        st.markdown(f'<div class="song-title">{video_name}</div>', unsafe_allow_html=True)
+        # Display waveform
+        st.pyplot(waveform_fig)
+        if chorus_start_times:
+            # Create chorus compilation section
+            st.markdown("### Chorus Compilation")
+            st.markdown('<div class="subheader">All choruses with 1-second lead-in</div>', unsafe_allow_html=True)
+            st.audio(chorus_audio, format="audio/mp3")
+            # Display individual chorus segments
+            st.markdown("### Chorus Segments")
+            # Create columns for each chorus segment
+            for i, (start_time, end_time, segment) in enumerate(chorus_segments):
+                segment_audio = save_audio_for_streamlit(segment, audio_features.sr)
+                st.markdown(f"""
+                <div class="chorus-card">
+                    <span style="font-weight: 700;">Chorus {i+1}:</span>
+                    <span class="time-stamp">{format_time(start_time)} - {format_time(end_time)}</span>
+                </div>
+                """, unsafe_allow_html=True)
+                st.audio(segment_audio, format="audio/mp3")
+        else:
+            st.warning("No choruses were identified in this song.")
+        st.markdown('</div>', unsafe_allow_html=True)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")
+        import traceback
+        st.error(traceback.format_exc())
+def main() -> None:
+    """Main function for the Streamlit app."""
+    st.set_page_config(
+        page_title="Automated Chorus Detection",
+        page_icon="🎵",
+        layout="wide",
+        initial_sidebar_state="expanded",
+    )
+    # Apply custom theme
+    set_custom_theme()
+    # Header
+    col1, col2 = st.columns([1, 5])
+    with col2:
+        st.title("Automated Chorus Detection")
+        st.markdown('<div class="subheader">Analyze songs and identify chorus sections using AI</div>', unsafe_allow_html=True)
+    # Sidebar
+    st.sidebar.markdown("## About")
+    st.sidebar.markdown("""
+    This app uses a deep learning model trained on over 300 annotated songs
+    to identify chorus sections in music.
+    **Features:**
+    - Detects chorus sections in songs
+    - Creates playable audio snippets of choruses
+    - Visualizes audio waveform with highlighted choruses
+    For more information, visit the [GitHub repository](https://github.com/dennisvdang/chorus-detection).
+    """)
+    # Main content with vertically stacked input methods
+    st.markdown("## Select Input Method")
+    # File upload option (now first)
+    st.markdown("### Upload Audio File")
+    uploaded_file = st.file_uploader(
+        "",
+        type=["mp3", "wav", "ogg", "flac", "m4a"],
+        help="Upload an audio file in MP3, WAV, OGG, FLAC, or M4A format",
+        key="file_upload"
+    )
+    if uploaded_file is not None:
+        st.audio(uploaded_file, format="audio/mp3")
+    upload_process_button = st.button("Process Uploaded Audio")
+    # OR divider
+    st.markdown('<div class="or-divider">OR</div>', unsafe_allow_html=True)
+    # YouTube URL input (now second)
+    st.markdown("### YouTube URL")
+    url = st.text_input(
+        "",
+        placeholder="Paste a YouTube video URL here...",
+        help="Enter the URL of a YouTube video to analyze",
+        key="youtube_url"
+    )
+    youtube_process_button = st.button("Process YouTube Video")
+    # Process uploaded file if selected
+    if uploaded_file is not None and upload_process_button:
+        audio_path, file_name = process_uploaded_file(uploaded_file)
+        if audio_path:
+            analyze_audio(audio_path, file_name)
+            # Clean up the temporary file
+            try:
+                os.remove(audio_path)
+            except:
+                pass
+    # Process YouTube URL if selected
+    if youtube_process_button and url:
+        if not is_youtube_url(url):
+            st.error("Please enter a valid YouTube URL.")
+        else:
+            audio_path, video_name = process_youtube(url)
+            if audio_path:
+                analyze_audio(audio_path, video_name)
+                # Clean up the temporary file
+                try:
+                    os.remove(audio_path)
+                except:
+                    pass
+if __name__ == "__main__":
+    main()

download_model.py ADDED Viewed

	@@ -0,0 +1,128 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Script to download the chorus detection model from HuggingFace.
+This script checks if the model file exists locally, and if not, downloads it
+from the specified HuggingFace repository.
+"""
+import os
+import sys
+from pathlib import Path
+import logging
+# Use huggingface_hub for better integration with HF ecosystem
+try:
+    from huggingface_hub import hf_hub_download
+    HF_HUB_AVAILABLE = True
+except ImportError:
+    HF_HUB_AVAILABLE = False
+    import requests
+    from tqdm import tqdm
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger("model-downloader")
+def download_file_with_progress(url: str, destination: Path) -> None:
+    """Download a file with a progress bar.
+    Args:
+        url: URL to download from
+        destination: Path to save the file to
+    """
+    # Create parent directories if they don't exist
+    destination.parent.mkdir(parents=True, exist_ok=True)
+    # Stream the download with progress bar
+    response = requests.get(url, stream=True)
+    response.raise_for_status()
+    total_size = int(response.headers.get('content-length', 0))
+    block_size = 1024  # 1 Kibibyte
+    logger.info(f"Downloading model from {url}")
+    logger.info(f"File size: {total_size / (1024*1024):.1f} MB")
+    with open(destination, 'wb') as file, tqdm(
+            desc=destination.name,
+            total=total_size,
+            unit='iB',
+            unit_scale=True,
+            unit_divisor=1024,
+    ) as bar:
+        for data in response.iter_content(block_size):
+            size = file.write(data)
+            bar.update(size)
+def ensure_model_exists(
+    model_filename: str = "best_model_V3.h5",
+    repo_id: str = "dennisvdang/chorus-detection",
+    model_dir: Path = Path("models/CRNN"),
+    hf_model_filename: str = "chorus_detection_crnn.h5"
+) -> Path:
+    """Ensure the model file exists, downloading it if necessary.
+    Args:
+        model_filename: Local filename for the model
+        repo_id: HuggingFace repository ID
+        model_dir: Directory to save the model to
+        hf_model_filename: Filename of the model in the HuggingFace repo
+    Returns:
+        Path to the model file
+    """
+    model_path = model_dir / model_filename
+    # Check if the model already exists
+    if model_path.exists():
+        logger.info(f"Model already exists at {model_path}")
+        return model_path
+    # Create model directory if it doesn't exist
+    model_dir.mkdir(parents=True, exist_ok=True)
+    logger.info(f"Model not found at {model_path}. Downloading...")
+    try:
+        if HF_HUB_AVAILABLE:
+            # Use huggingface_hub to download the model
+            logger.info(f"Downloading model from {repo_id}/{hf_model_filename} using huggingface_hub")
+            downloaded_path = hf_hub_download(
+                repo_id=repo_id,
+                filename=hf_model_filename,
+                local_dir=model_dir,
+                local_dir_use_symlinks=False
+            )
+            # Rename if necessary
+            if os.path.basename(downloaded_path) != model_filename:
+                downloaded_path_obj = Path(downloaded_path)
+                model_path.parent.mkdir(parents=True, exist_ok=True)
+                if model_path.exists():
+                    model_path.unlink()
+                downloaded_path_obj.rename(model_path)
+                logger.info(f"Renamed {downloaded_path} to {model_path}")
+        else:
+            # Fallback to direct download if huggingface_hub is not available
+            huggingface_url = f"https://huggingface.co/{repo_id}/resolve/main/{hf_model_filename}"
+            download_file_with_progress(huggingface_url, model_path)
+        logger.info(f"Successfully downloaded model to {model_path}")
+        return model_path
+    except Exception as e:
+        logger.error(f"Failed to download model: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    # Allow overriding the repository via environment variable
+    repo_id = os.environ.get("MODEL_HF_REPO", "dennisvdang/chorus-detection")
+    # Check if an alternative model filename was provided
+    hf_model_filename = os.environ.get("HF_MODEL_FILENAME", "chorus_detection_crnn.h5")
+    ensure_model_exists(repo_id=repo_id, hf_model_filename=hf_model_filename)

requirements.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# Core dependencies
+numpy>=1.24.4
+scipy>=1.10.1
+tqdm>=4.66.1
+# Machine learning
+tensorflow>=2.15.0
+keras>=2.15.0
+scikit-learn>=1.3.0
+# Audio processing
+librosa>=0.10.1
+soundfile>=0.12.1
+pydub>=0.25.1
+ffmpeg-python>=0.2.0
+# Video/data acquisition
+yt-dlp>=2023.10.7
+requests>=2.31.0
+# Visualization
+matplotlib>=3.7.2
+# Web app
+streamlit>=1.26.0
+# For model downloading
+huggingface_hub>=0.16.4