#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This module contains normalization utilities
"""
import numpy as np
EPSILON = 0.0001
def range_normalize(
array,
min_value=None,
max_value=None,
log=False,
log2=False,
exp=False,
exp2=False,
hard_clip=True,
):
"""
Linear mapping a vector from range [min_value, max_value] to [0, 1].
Preprocessing possible with log and exp.
Values exceeding the range [0, 1] are clipped to 0 or 1 if
clip is True, otherwise they are extrapolated.
"""
if min_value is None:
min_value = array.min()
if max_value is None:
max_value = array.max()
if log:
array = np.log(np.abs(array) + EPSILON)
elif log2:
array = np.log2(np.abs(array) + EPSILON)
if exp:
array = np.exp(array)
elif exp2:
array = np.exp2(array)
# handle div by zero
if min_value == max_value:
array = np.clip(array, 0, 1)
else:
array = (array - min_value) / (max_value - min_value)
if hard_clip:
return np.clip(array, 0, 1)
else:
return array
def zero_one_normalize(
array, min_value=-3.0, max_value=3.0, log=False, exp=False, clip=True
):
"""
Compute zero mean and unit variance of a vector.
Preprocessing possible with log and exp.
Values exceeding the range [-min_value, max_value]
are clipped if clip is True.
"""
if log:
array = np.log(np.abs(array) + EPSILON)
if exp:
array = np.exp(array)
array = (array - array.mean()) / array.std()
if clip:
return np.clip(array, min_value, max_value)
else:
return array
def minmaxrange_normalize(array):
"""
Linear mapping of a vector from range [array.min(), array.max()] to [0, 1].
Constant vector is clipped to [0, 1].
"""
return range_normalize(array)
DEFAULT_NORM_FUNCS = {
"pitch": {
"func": range_normalize,
"kwargs": {"min_value": 0, "max_value": 127},
},
"velocity": {
"func": range_normalize,
"kwargs": {"min_value": 0, "max_value": 127},
},
"onset_beat": {
"func": minmaxrange_normalize,
"kwargs": {},
},
"duration_beat": {
"func": range_normalize,
"kwargs": {"min_value": -3, "max_value": 3, "log2": True},
# ref beat = 4th -> -3 = 32nd, 3 = breve
},
"beat_period": {
"func": range_normalize,
"kwargs": {"min_value": -3, "max_value": 2, "log2": True},
# ref 1 second / beat -> -3 = 0.125 sec / beat , 2 = 4 sec / beat
},
"timing": {
"func": range_normalize,
"kwargs": {"min_value": -0.2, "max_value": 0.2},
# deviation in seconds
},
"articulation_log": {
"func": range_normalize,
"kwargs": {"min_value": -4, "max_value": 3},
# thia ia the ratio in base2 log -> just min max clip
},
# fill up with all note and performance features
}
[docs]def normalize(
in_array,
norm_funcs=DEFAULT_NORM_FUNCS,
norm_func_fallback=minmaxrange_normalize,
default_value=np.inf,
):
"""
Normalize a note array.
May include note features as well as performance features.
All input columns must be of numeric types, everything is
cast to single precision float.
Parameters
----------
array : np.ndarray
The performance array to be normalized.
norm_funcs : dict
A dictionary of normalization functions for each feature.
Returns
-------
array : np.ndarray
The normalized performance array.
"""
dtype_new = np.dtype(
{
"names": in_array.dtype.names,
"formats": [float for k in range(len(in_array.dtype.names))],
}
)
array = in_array.copy().astype(dtype_new)
for feature in array.dtype.names:
# use mask for non-default values and don't change default values
non_default_mask = array[feature] != default_value
# check whether the feature has non-uniform values
if len(np.unique(array[feature][non_default_mask])) == 1:
array[feature][non_default_mask] = 0.0
else:
# check whether a normalization function is defined for the feature
if feature not in norm_funcs:
array[feature][non_default_mask] = norm_func_fallback(
array[feature][non_default_mask]
)
else:
array[feature][non_default_mask] = norm_funcs[feature]["func"](
array[feature][non_default_mask], **norm_funcs[feature]["kwargs"]
)
return array