# data pre-process

The drumkit sound dataset used for the training is available here:
https://s3-ap-northeast-1.amazonaws.com/codepen-dev/drumkit_dataset.zip

In [1]:
import librosa
import librosa.display
import numpy as np
from glob import glob
%matplotlib inline

N_FFT = 1024
HOP_LENGTH = 256 
SR = 16000
MELSPEC_SIZE = 128;

len_src = 3. 
ref_n_src = int(SR * len_src)

drum_dirs = [r.split('/')[-1] for r in sorted(glob('./selected_drums/*'))]
NB_CLASS = len(drum_dirs)

print drum_dirs

def get_melspec(filepath, hop_length=HOP_LENGTH, n_mels=128):

    y_tmp = np.zeros(ref_n_src)
    
    y, sr = librosa.core.load(filepath, sr = SR, mono=True)
    y = y[:ref_n_src]
    y_tmp[:len(y)] = y[:ref_n_src]
        
    # sfft -> mel conversion
    melspec = librosa.feature.melspectrogram(y=y_tmp, sr=sr,
                n_fft=N_FFT, hop_length=hop_length, n_mels=n_mels)
    S = librosa.power_to_db(melspec, np.max) 
        
    return S

['0_kick', '1_snare', '2_hihat_closed', '3_hihat_open', '4_tom_low', '5_tom_mid', '6_tom_high', '7_clap', '8_rim']


In [3]:
from glob import glob
from tqdm import tqdm
from random import shuffle


filepaths = glob("./selected_drums/*/*")
# filepaths2 = glob("./augmented/*/*")
# filepaths.extend(filepaths2)
print len(filepaths)


shuffle(filepaths)

drum_genres = []
drum_melspecs=[]
NB_CLASS = len(drum_dirs)


for filepath in tqdm(filepaths):
    dir_ = filepath.split("/")[-2]
    genre = drum_dirs.index(dir_)
    
    try:
        melspec = get_melspec(filepath, HOP_LENGTH, MELSPEC_SIZE)

        if melspec.shape[1] > MELSPEC_SIZE:
            melspec = melspec[:,:MELSPEC_SIZE]
        else:
            melspec.resize((MELSPEC_SIZE,MELSPEC_SIZE)) 

        drum_genres.append(genre)
        drum_melspecs.append(melspec)
    except:
        print ("error", filepath)

  0%|          | 0/27069 [00:00<?, ?it/s]

27069


100%|██████████| 27069/27069 [3:13:11<00:00,  2.34it/s]  


In [4]:
drum_genres = np.array(drum_genres)
print(drum_genres.shape)

from keras.utils import to_categorical

drum_genres = to_categorical(drum_genres, NB_CLASS)
print(drum_genres.shape)

drum_melspecs = np.array(drum_melspecs)
drum_melspecs = np.expand_dims(drum_melspecs, 3)
print(drum_melspecs.shape)


(27069,)


Using TensorFlow backend.


(27069, 9)
(27069, 128, 128, 1)


In [5]:
np.savez("drum_data_128.npz", melspecs=drum_melspecs, genres=drum_genres)

In [3]:
## loading from pre-processed npz file
# drum_melspecs = np.load("drum_data_128.npz")['melspecs']
# drum_genres = np.load("drum_data_128.npz")['genres']

# training

In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten
from keras.layers import BatchNormalization,Activation
from keras.layers.advanced_activations import ELU

from keras.models import Model
from keras import backend as K

SIZE = MELSPEC_SIZE

input_img = Input(shape=(SIZE, SIZE, 1)) # normalized, 128 x 128

x = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal')(input_img) #nb_filter, nb_row, nb_col
x = BatchNormalization(axis=1)(x)
x = ELU(alpha=1.0)(x)
x = MaxPooling2D((4, 4))(x)

x = Conv2D(64, (3, 3), padding='same',kernel_initializer='he_normal')(x)
x = BatchNormalization(axis=1)(x)
x = ELU(alpha=1.0)(x)
x = MaxPooling2D((2, 2))(x)


# x = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(x)
# x = BatchNormalization(axis=1)(x)
# x = ELU(alpha=1.0)(x)
# x = MaxPooling2D((2, 2), padding='same')(x)
# print K.int_shape(x)

x = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization(axis=1)(x)
x = ELU(alpha=1.0)(x)
x = MaxPooling2D((2, 4))(x)

x = Conv2D(32, (3, 3), padding='same', kernel_initializer='he_normal')(x)
x = BatchNormalization(axis=1)(x)
x = ELU(alpha=1.0)(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(NB_CLASS)(x)
y = Activation("softmax")(x)

model = Model(input_img, y)

Using TensorFlow backend.


In [5]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 128, 1)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 128, 128, 32)      320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 128, 128, 32)      512       
_________________________________________________________________
elu_1 (ELU)                  (None, 128, 128, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
batch_normalization_2 (Batch (None, 32, 32, 64)        128       
__________

In [6]:
class_weight = {}
total = drum_genres.shape[0]
for i in range(NB_CLASS):
    nb = np.sum(np.argmax(drum_genres, axis=1) == i)
    class_weight[i] = total / float(nb) 
print class_weight
    

{0: 3.808244231851435, 1: 3.4399542508577965, 2: 13.159455517744288, 3: 8.788636363636364, 4: 13.089458413926499, 5: 17.703727926749508, 6: 13.747587607922803, 7: 21.18075117370892, 8: 243.86486486486487}


In [7]:
nb_total = drum_melspecs.shape[0]
nb_train = int(nb_total * 0.9)
print nb_total, nb_train

train_melspecs = drum_melspecs[:nb_train]
train_genres = drum_genres[:nb_train]

val_melspecs = drum_melspecs[nb_train:]
val_genres = drum_genres[nb_train:]

27069 24362


In [8]:
train_melspecs.shape
train_genres.shape


(24362, 9)

In [11]:
from keras.callbacks import EarlyStopping
es = EarlyStopping(verbose=1, patience=5)

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics = ['acc'])

model.fit(train_melspecs, train_genres, batch_size=64, 
          epochs=100, verbose=1, 
          shuffle=False,validation_data = (val_melspecs, val_genres), class_weight=class_weight, callbacks=[es])


Train on 24362 samples, validate on 2707 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 00031: early stopping


<keras.callbacks.History at 0x7fc058f91c50>

In [12]:
model.save("model/drum_spec_model_128.h5")

then you can conver keras model into tensorflow.js model with the following command 

```$ tensorflowjs_converter --input_format keras \
                       path/to/my_model.h5 \
                       path/to/tfjs_target_dir```