Spaces:

balthou
/

interactive-pipe-audio-xylo

Runtime error

File size: 6,713 Bytes

from interactive_pipe import interactive_pipeline, interactive, Control, Image
from synthetizer import NOTE_FREQUENCIES, get_note
from interactive_pipe.data_objects.audio import Audio
from pathlib import Path
from typing import Tuple
from time import sleep
import argparse
import cv2
import numpy as np
import logging


SONG_1 = "Au clair de la lune"
SONG_2 = "Ainsi font, font, font"
SONG_3 = "Dodo l'enfant do"
SONG_4 = "A la claire fontaine"

SONG = {
    SONG_1: "fa fa fa sol la - sol - fa la sol sol fa - fa fa fa sol la - sol - fa la sol sol fa - - sol sol sol sol re - re - sol fa mi re do - fa fa fa sol la - sol - fa la sol sol fa",
    SONG_2: "mi mi do - mi - sol - sol - la sol fa mi re mi do mi mi do - mi - sol - sol - la sol fa mi re do",
    SONG_3: "mi - do - mi mi do - re mi fa mi re sol mi do - mi - do - mi mi do - re mi fa mi re sol do",
    SONG_4: "fa - fa la la sol la sol - fa - fa la la sol la - la - la sol fa la do la do - do la fa la sol - fa - fa la la sol fa la fa la - la sol fa la sol fa"
}


def select_song(song: str = SONG_1, context={}):
    previous_song = context.get("song", None)
    if previous_song != song:
        # reset time index
        context["time_index"] = 0
    context["song"] = song


def select_note(note="C4", context={}):
    context["note"] = note


def create_note(context={}):
    note = context.get("note", "C4")
    audio_signal = get_note(note)
    return audio_signal


def play_note(audio_signal: np.ndarray, context={}):
    note = context.get("note", "C4")
    file_name = Path(f"__{note}.wav")
    if not file_name.exists():
        Audio.save_audio(audio_signal, str(file_name), 44100)
        while not file_name.exists():
            sleep(0.01)
            print("waiting for file")
    assert file_name.exists()
    try:
        if context["time_index"] == 0:
            context["__stop"]()
        else:
            context["__set_audio"](file_name)
            context["__play"]()
    except Exception as e:
        logging.warning(
            f"Error playing note {note}: {e}, not expected to work with MPL backend for instance")


def display_current_color(context={}):
    if context["time_index"] == 0:
        return np.zeros((256, 256, 3))
    note = context.get("note", "C4")
    return get_color(note,  size=(256, 256))


def display_next_color(context={}):
    target_note = context.get("target_note", None)
    if target_note is None:
        return np.zeros((256, 256, 3))
    return get_color(target_note,  size=(256, 256))


NOTES_TRANSLATION = ["do", "re", "mi", "fa", "sol", "la", "si", "do2"]
NOTES_CORRESPONDANCE = {
    NOTES_TRANSLATION[i]: note for i, note in enumerate(list(NOTE_FREQUENCIES.keys()))}


def get_color(note, size=(256, 256)):
    colors = {
        "red": (1.0, 0.0, 0.0),
        "orange": (1.0, 0.65, 0.0),
        "yellow": (0.9, 0.9, 0.0),
        "green": (0.0, 0.5, 0.0),
        "cyan": (0.0, 0.7, 1.0),
        "dark blue": (0.0, 0.0, 0.7),
        "purple": (0.5, 0.0, 0.5),
        "pink": (1.0, 0.75, 0.8),

    }
    index = list(NOTE_FREQUENCIES.keys()).index(note)
    color = colors.get(list(colors.keys())[index], [0., 0., 0.])
    img = np.ones((size[1], size[0], 3)) * np.array(color)[None, None, :]
    text = NOTES_TRANSLATION[index].upper()
    font_scale = size[0] // 64
    thickness = 2
    text_size = cv2.getTextSize(
        text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)[0]
    text_x = (size[0] - text_size[0]) // 2
    text_y = (size[1] + text_size[1]) // 2
    cv2.putText(
        img,
        text,
        (text_x, text_y),
        cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), thickness
    )
    return img


def add_border(img, border_size=10, color=(0.2, 0.2, 0.2)):
    border_size = 4
    img[:border_size, :] = color
    img[-border_size:, :] = color
    img[:, :border_size] = color
    img[:, -border_size:] = color


def increment_time(context: dict = {}) -> None:
    time_index = context.get("time_index", None)
    if time_index is not None:
        context["time_index"] += 1
    else:
        context["time_index"] = 0


def xylo_player():
    select_song()
    select_note()
    full_song = song_player()
    audio = create_note()
    play_note(audio)
    # current_note = display_current_color()
    target_note = display_next_color()
    increment_time()
    return [full_song, target_note]


def song_player(context={}):
    song = context["song"]

    song_str = SONG.get(song, "")
    image_song, target_note = generate_song(
        song_str, current_time=context.get("time_index", 0))
    context["target_note"] = target_note
    return image_song


def generate_song(song_str, current_time=None) -> Tuple[np.ndarray, str]:
    notes = song_str.split(" ")
    all_notes = []
    size = (64, 128)
    index_no_silence = -1
    target_note = None
    for idx, note in enumerate(notes):
        if note in ["-", "."]:
            img_note = np.zeros((size[1], size[0], 3))
            color = (0.2, 0.2, 0.2)
        else:
            note_classic = NOTES_CORRESPONDANCE.get(note, None)
            index_no_silence += 1
            if note_classic is None:
                print(f"Note {note} not found")
                continue

            img_note = get_color(note_classic, size=size)
            if current_time == index_no_silence:
                target_note = note_classic
                color = (0.8, 0., 0.)
            else:
                color = (0.2, 0.2, 0.2)
        add_border(img_note, color=color)
        all_notes.append(img_note)
    max_notes_per_line = 12
    remainder = max_notes_per_line - len(all_notes) % max_notes_per_line
    for _ in range(remainder):
        all_notes.append(np.zeros_like(all_notes[0]))
    note_lines = [all_notes[i:i + max_notes_per_line]
                  for i in range(0, len(all_notes), max_notes_per_line)]
    out_image = np.vstack([np.hstack(line) for line in note_lines])
    return out_image, target_note


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Xylophone synthesizer')
    parser.add_argument('-b', '--backend', type=str,
                        default='gradio', choices=['gradio', 'qt', 'mpl'])
    args = parser.parse_args()
    all_notes = list(NOTE_FREQUENCIES.keys())
    icon_list = [Path(f"__{note}.jpg") for note in all_notes]
    for note, icon in zip(all_notes, icon_list):
        img = get_color(note, size=(512, 512))
        Image.save_image(img, icon)
    interactive(note=Control("C4", all_notes, icons=icon_list))(select_note)
    interactive(song=(SONG_1, list(SONG.keys())))(select_song)
    interactive_pipeline(
        gui=args.backend,
        cache=False,
        audio=True
    )(xylo_player)()