In [1]:
import json
import pathlib

import numpy as np
import librosa
import tqdm
import joblib
In [2]:
sampling_rate = 8000
In [12]:
def compute_melspec(filename):
    waveform, _ = librosa.load(filename, sr=sampling_rate)
    melspec = librosa.feature.melspectrogram(y=waveform, sr=sampling_rate, n_mels=64, fmax=4000)
    melspec = librosa.power_to_db(melspec, ref=np.max)
    # Normalize the mel spectrogram
    melspec = (melspec.clip(min=-80, max=0) + 80) / 80
    melspec = np.concatenate((melspec, np.zeros((64, 64 - melspec.shape[1]), dtype=melspec.dtype)), axis=1)
    melspec = melspec.reshape(1, 64, 64)
    return melspec
In [4]:
data_dir = pathlib.Path("data").resolve()
processed_dir = data_dir / "nsynth_processed_64x64"
In [5]:
processed_dir.mkdir(exist_ok=True)
In [6]:
def process(filename, out_dir):
    out_filename = out_dir / f"{filename.stem}.npy"
    if out_filename.is_file():
        return
    melspec = compute_melspec(filename)
    np.save(out_filename, melspec)
In [8]:
with open(data_dir / "nsynth" / "nsynth-train" / "examples.json") as f:
    data = json.load(f)
In [13]:
for key in ("train", "valid", "test"):
    print(f"Working on {key} split...")
    audio_dir = data_dir / "nsynth" / f"nsynth-{key}" / "audio"
    print("Loading filenames...")
    with open(data_dir / "nsynth" / f"nsynth-{key}" / "examples.json") as f:
        data = json.load(f)
        filenames = [audio_dir / f"{key}.wav" for key in data.keys()]
    # filenames = list(audio_dir.rglob("*.wav"))
    out_dir = processed_dir / key
    out_dir.mkdir(exist_ok=True)
    joblib.Parallel(n_jobs=20, verbose=1)(
        joblib.delayed(process)(filename, out_dir) for filename in filenames
    )
Working on train split...
Loading filenames...
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    4.9s
[Parallel(n_jobs=20)]: Done 260 tasks      | elapsed:    5.2s
[Parallel(n_jobs=20)]: Done 8385 tasks      | elapsed:  1.1min
[Parallel(n_jobs=20)]: Done 9370 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done 11090 tasks      | elapsed:  1.2min
[Parallel(n_jobs=20)]: Done 11640 tasks      | elapsed:  1.3min
[Parallel(n_jobs=20)]: Done 12860 tasks      | elapsed:  1.4min
[Parallel(n_jobs=20)]: Done 14360 tasks      | elapsed:  1.6min
[Parallel(n_jobs=20)]: Done 16060 tasks      | elapsed:  1.7min
[Parallel(n_jobs=20)]: Done 17960 tasks      | elapsed:  1.9min
[Parallel(n_jobs=20)]: Done 20060 tasks      | elapsed:  2.1min
[Parallel(n_jobs=20)]: Done 22360 tasks      | elapsed:  2.3min
[Parallel(n_jobs=20)]: Done 24860 tasks      | elapsed:  2.6min
[Parallel(n_jobs=20)]: Done 27560 tasks      | elapsed:  2.8min
[Parallel(n_jobs=20)]: Done 30460 tasks      | elapsed:  3.1min
[Parallel(n_jobs=20)]: Done 33560 tasks      | elapsed:  3.4min
[Parallel(n_jobs=20)]: Done 36860 tasks      | elapsed:  3.8min
[Parallel(n_jobs=20)]: Done 40360 tasks      | elapsed:  4.1min
[Parallel(n_jobs=20)]: Done 44060 tasks      | elapsed:  4.5min
[Parallel(n_jobs=20)]: Done 47960 tasks      | elapsed:  4.8min
[Parallel(n_jobs=20)]: Done 52060 tasks      | elapsed:  5.2min
[Parallel(n_jobs=20)]: Done 56360 tasks      | elapsed:  5.7min
[Parallel(n_jobs=20)]: Done 60860 tasks      | elapsed:  6.1min
[Parallel(n_jobs=20)]: Done 65560 tasks      | elapsed:  6.6min
[Parallel(n_jobs=20)]: Done 70460 tasks      | elapsed:  7.0min
[Parallel(n_jobs=20)]: Done 75560 tasks      | elapsed:  7.5min
[Parallel(n_jobs=20)]: Done 80860 tasks      | elapsed:  8.1min
[Parallel(n_jobs=20)]: Done 86360 tasks      | elapsed:  8.6min
[Parallel(n_jobs=20)]: Done 92060 tasks      | elapsed:  9.2min
[Parallel(n_jobs=20)]: Done 97960 tasks      | elapsed:  9.7min
[Parallel(n_jobs=20)]: Done 104060 tasks      | elapsed: 10.3min
[Parallel(n_jobs=20)]: Done 110360 tasks      | elapsed: 11.0min
[Parallel(n_jobs=20)]: Done 116860 tasks      | elapsed: 11.6min
[Parallel(n_jobs=20)]: Done 123560 tasks      | elapsed: 12.2min
[Parallel(n_jobs=20)]: Done 130460 tasks      | elapsed: 12.9min
[Parallel(n_jobs=20)]: Done 137560 tasks      | elapsed: 13.5min
[Parallel(n_jobs=20)]: Done 144860 tasks      | elapsed: 14.1min
[Parallel(n_jobs=20)]: Done 152360 tasks      | elapsed: 14.8min
[Parallel(n_jobs=20)]: Done 160060 tasks      | elapsed: 15.5min
[Parallel(n_jobs=20)]: Done 167960 tasks      | elapsed: 16.2min
[Parallel(n_jobs=20)]: Done 176060 tasks      | elapsed: 16.9min
[Parallel(n_jobs=20)]: Done 184360 tasks      | elapsed: 17.6min
[Parallel(n_jobs=20)]: Done 192860 tasks      | elapsed: 18.3min
[Parallel(n_jobs=20)]: Done 201560 tasks      | elapsed: 19.1min
[Parallel(n_jobs=20)]: Done 210460 tasks      | elapsed: 19.9min
[Parallel(n_jobs=20)]: Done 219560 tasks      | elapsed: 20.7min
[Parallel(n_jobs=20)]: Done 228860 tasks      | elapsed: 21.5min
[Parallel(n_jobs=20)]: Done 238360 tasks      | elapsed: 22.3min
[Parallel(n_jobs=20)]: Done 248060 tasks      | elapsed: 23.1min
[Parallel(n_jobs=20)]: Done 257960 tasks      | elapsed: 24.0min
[Parallel(n_jobs=20)]: Done 268060 tasks      | elapsed: 24.8min
[Parallel(n_jobs=20)]: Done 278360 tasks      | elapsed: 25.7min
[Parallel(n_jobs=20)]: Done 288860 tasks      | elapsed: 26.6min
[Parallel(n_jobs=20)]: Done 289166 out of 289205 | elapsed: 26.7min remaining:    0.2s
[Parallel(n_jobs=20)]: Done 289205 out of 289205 | elapsed: 26.7min finished
Working on valid split...
Loading filenames...
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.1s
[Parallel(n_jobs=20)]: Done 280 tasks      | elapsed:    1.6s
[Parallel(n_jobs=20)]: Done 780 tasks      | elapsed:    4.2s
[Parallel(n_jobs=20)]: Done 1480 tasks      | elapsed:    7.9s
[Parallel(n_jobs=20)]: Done 2380 tasks      | elapsed:   12.7s
[Parallel(n_jobs=20)]: Done 3480 tasks      | elapsed:   18.4s
[Parallel(n_jobs=20)]: Done 4780 tasks      | elapsed:   25.2s
[Parallel(n_jobs=20)]: Done 6280 tasks      | elapsed:   33.0s
[Parallel(n_jobs=20)]: Done 7980 tasks      | elapsed:   41.8s
[Parallel(n_jobs=20)]: Done 9880 tasks      | elapsed:   51.6s
[Parallel(n_jobs=20)]: Done 11980 tasks      | elapsed:  1.0min
[Parallel(n_jobs=20)]: Done 12678 out of 12678 | elapsed:  1.1min finished
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers.
[Parallel(n_jobs=20)]: Done  10 tasks      | elapsed:    0.1s
Working on test split...
Loading filenames...
[Parallel(n_jobs=20)]: Done 280 tasks      | elapsed:    1.7s
[Parallel(n_jobs=20)]: Done 780 tasks      | elapsed:    4.3s
[Parallel(n_jobs=20)]: Done 1480 tasks      | elapsed:    8.0s
[Parallel(n_jobs=20)]: Done 2380 tasks      | elapsed:   12.7s
[Parallel(n_jobs=20)]: Done 3480 tasks      | elapsed:   18.5s
[Parallel(n_jobs=20)]: Done 4096 out of 4096 | elapsed:   21.5s finished
In [ ]: