130 lines
4.7 KiB
Python
130 lines
4.7 KiB
Python
|
from typing import overload
|
||
|
import librosa
|
||
|
from numba import jit
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
@jit(nopython=True)
|
||
|
def normalize_feature_sequence(X, norm='2', threshold=0.0001, v=None):
|
||
|
"""Normalizes the columns of a feature sequence
|
||
|
Notebook: C3/C3S1_FeatureNormalization.ipynb
|
||
|
Args:
|
||
|
X (np.ndarray): Feature sequence
|
||
|
norm (str): The norm to be applied. '1', '2', 'max' or 'z' (Default value = '2')
|
||
|
threshold (float): An threshold below which the vector ``v`` used instead of normalization
|
||
|
(Default value = 0.0001)
|
||
|
v (float): Used instead of normalization below ``threshold``. If None, uses unit vector for given norm
|
||
|
(Default value = None)
|
||
|
Returns:
|
||
|
X_norm (np.ndarray): Normalized feature sequence
|
||
|
"""
|
||
|
assert norm in ['1', '2', 'max', 'z']
|
||
|
|
||
|
K, N = X.shape
|
||
|
X_norm = np.zeros((K, N))
|
||
|
|
||
|
if norm == '1':
|
||
|
if v is None:
|
||
|
v = np.ones(K, dtype=np.float64) / K
|
||
|
for n in range(N):
|
||
|
s = np.sum(np.abs(X[:, n]))
|
||
|
if s > threshold:
|
||
|
X_norm[:, n] = X[:, n] / s
|
||
|
else:
|
||
|
X_norm[:, n] = v
|
||
|
|
||
|
if norm == '2':
|
||
|
if v is None:
|
||
|
v = np.ones(K, dtype=np.float64) / np.sqrt(K)
|
||
|
for n in range(N):
|
||
|
s = np.sqrt(np.sum(X[:, n]**2))
|
||
|
if s > threshold:
|
||
|
X_norm[:, n] = X[:, n] / s
|
||
|
else:
|
||
|
X_norm[:, n] = v
|
||
|
|
||
|
if norm == 'max':
|
||
|
if v is None:
|
||
|
v = np.ones(K, dtype=np.float64)
|
||
|
for n in range(N):
|
||
|
s = np.max(np.abs(X[:, n]))
|
||
|
if s > threshold:
|
||
|
X_norm[:, n] = X[:, n] / s
|
||
|
else:
|
||
|
X_norm[:, n] = v
|
||
|
|
||
|
if norm == 'z':
|
||
|
if v is None:
|
||
|
v = np.zeros(K, dtype=np.float64)
|
||
|
for n in range(N):
|
||
|
mu = np.sum(X[:, n]) / K
|
||
|
sigma = np.sqrt(np.sum((X[:, n] - mu)**2) / (K - 1))
|
||
|
if sigma > threshold:
|
||
|
X_norm[:, n] = (X[:, n] - mu) / sigma
|
||
|
else:
|
||
|
X_norm[:, n] = v
|
||
|
|
||
|
return X_norm
|
||
|
|
||
|
|
||
|
def compute_chromagram_from_filename(fn_wav,
|
||
|
Fs=22050,
|
||
|
N=4096,
|
||
|
H=2048,
|
||
|
gamma=None,
|
||
|
version='STFT',
|
||
|
norm='2'):
|
||
|
"""Compute chromagram for WAV file specified by filename
|
||
|
|
||
|
Notebook: C5/C5S2_ChordRec_Templates.ipynb
|
||
|
|
||
|
Args:
|
||
|
fn_wav (str): Filenname of WAV
|
||
|
Fs (scalar): Sampling rate (Default value = 22050)
|
||
|
N (int): Window size (Default value = 4096)
|
||
|
H (int): Hop size (Default value = 2048)
|
||
|
gamma (float): Constant for logarithmic compression (Default value = None)
|
||
|
version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT')
|
||
|
norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max')
|
||
|
(Default value = '2')
|
||
|
|
||
|
Returns:
|
||
|
X (np.ndarray): Chromagram
|
||
|
Fs_X (scalar): Feature reate of chromagram
|
||
|
x (np.ndarray): Audio signal
|
||
|
Fs (scalar): Sampling rate of audio signal
|
||
|
x_dur (float): Duration (seconds) of audio signal
|
||
|
"""
|
||
|
x, Fs = librosa.load(fn_wav, sr=Fs)
|
||
|
x_dur = x.shape[0] / Fs
|
||
|
if version == 'STFT':
|
||
|
# Compute chroma features with STFT
|
||
|
X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True)
|
||
|
if gamma is not None:
|
||
|
X = np.log(1 + gamma * np.abs(X)**2)
|
||
|
else:
|
||
|
X = np.abs(X)**2
|
||
|
X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N)
|
||
|
if version == 'CQT':
|
||
|
# Compute chroma features with CQT decomposition
|
||
|
X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None)
|
||
|
if version == 'IIR':
|
||
|
# Compute chroma features with filter bank (using IIR elliptic filter)
|
||
|
X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0)
|
||
|
if gamma is not None:
|
||
|
X = np.log(1.0 + gamma * X)
|
||
|
X = librosa.feature.chroma_cqt(C=X,
|
||
|
bins_per_octave=12,
|
||
|
n_octaves=7,
|
||
|
fmin=librosa.midi_to_hz(24),
|
||
|
norm=None)
|
||
|
if norm is not None:
|
||
|
X = normalize_feature_sequence(X, norm='2')
|
||
|
Fs_X = Fs / H
|
||
|
return X, Fs_X, x, Fs, x_dur
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
test_file = "/home/martin/Music/deemix Music/Simone Sommerland - Ki-Ka-Kinderturnen.mp3"
|
||
|
chroma = compute_chromagram_from_filename(test_file)
|