Source code for partitura.utils.normalize

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This module contains normalization utilities
"""
import numpy as np


EPSILON = 0.0001


def range_normalize(
    array,
    min_value=None,
    max_value=None,
    log=False,
    log2=False,
    exp=False,
    exp2=False,
    hard_clip=True,
):
    """
    Linear mapping a vector from range [min_value, max_value] to [0, 1].
    Preprocessing possible with log and exp.
    Values exceeding the range [0, 1] are clipped to 0 or 1 if
    clip is True, otherwise they are extrapolated.
    """
    if min_value is None:
        min_value = array.min()
    if max_value is None:
        max_value = array.max()
    if log:
        array = np.log(np.abs(array) + EPSILON)
    elif log2:
        array = np.log2(np.abs(array) + EPSILON)
    if exp:
        array = np.exp(array)
    elif exp2:
        array = np.exp2(array)
    # handle div by zero
    if min_value == max_value:
        array = np.clip(array, 0, 1)
    else:
        array = (array - min_value) / (max_value - min_value)
    if hard_clip:
        return np.clip(array, 0, 1)
    else:
        return array


def zero_one_normalize(
    array, min_value=-3.0, max_value=3.0, log=False, exp=False, clip=True
):
    """
    Compute zero mean and unit variance of a vector.
    Preprocessing possible with log and exp.
    Values exceeding the range [-min_value, max_value]
    are clipped if clip is True.
    """

    if log:
        array = np.log(np.abs(array) + EPSILON)
    if exp:
        array = np.exp(array)

    array = (array - array.mean()) / array.std()
    if clip:
        return np.clip(array, min_value, max_value)
    else:
        return array


def minmaxrange_normalize(array):
    """
    Linear mapping of a vector from range [array.min(), array.max()] to [0, 1].
    Constant vector is clipped to [0, 1].
    """
    return range_normalize(array)


DEFAULT_NORM_FUNCS = {
    "pitch": {
        "func": range_normalize,
        "kwargs": {"min_value": 0, "max_value": 127},
    },
    "velocity": {
        "func": range_normalize,
        "kwargs": {"min_value": 0, "max_value": 127},
    },
    "onset_beat": {
        "func": minmaxrange_normalize,
        "kwargs": {},
    },
    "duration_beat": {
        "func": range_normalize,
        "kwargs": {"min_value": -3, "max_value": 3, "log2": True},
        # ref beat = 4th -> -3 = 32nd, 3 = breve
    },
    "beat_period": {
        "func": range_normalize,
        "kwargs": {"min_value": -3, "max_value": 2, "log2": True},
        # ref 1 second / beat -> -3 = 0.125 sec / beat , 2 = 4 sec / beat
    },
    "timing": {
        "func": range_normalize,
        "kwargs": {"min_value": -0.2, "max_value": 0.2},
        # deviation in seconds
    },
    "articulation_log": {
        "func": range_normalize,
        "kwargs": {"min_value": -4, "max_value": 3},
        # thia ia the ratio in base2 log -> just min max clip
    },
    # fill up with all note and performance features
}


[docs]def normalize( in_array, norm_funcs=DEFAULT_NORM_FUNCS, norm_func_fallback=minmaxrange_normalize, default_value=np.inf, ): """ Normalize a note array. May include note features as well as performance features. All input columns must be of numeric types, everything is cast to single precision float. Parameters ---------- array : np.ndarray The performance array to be normalized. norm_funcs : dict A dictionary of normalization functions for each feature. Returns ------- array : np.ndarray The normalized performance array. """ dtype_new = np.dtype( { "names": in_array.dtype.names, "formats": [float for k in range(len(in_array.dtype.names))], } ) array = in_array.copy().astype(dtype_new) for feature in array.dtype.names: # use mask for non-default values and don't change default values non_default_mask = array[feature] != default_value # check whether the feature has non-uniform values if len(np.unique(array[feature][non_default_mask])) == 1: array[feature][non_default_mask] = 0.0 else: # check whether a normalization function is defined for the feature if feature not in norm_funcs: array[feature][non_default_mask] = norm_func_fallback( array[feature][non_default_mask] ) else: array[feature][non_default_mask] = norm_funcs[feature]["func"]( array[feature][non_default_mask], **norm_funcs[feature]["kwargs"] ) return array