In [1]:
import json
import pathlib
import numpy as np
import librosa
import tqdm
import joblib
In [2]:
sampling_rate = 8000
In [12]:
def compute_melspec(filename):
waveform, _ = librosa.load(filename, sr=sampling_rate)
melspec = librosa.feature.melspectrogram(y=waveform, sr=sampling_rate, n_mels=64, fmax=4000)
melspec = librosa.power_to_db(melspec, ref=np.max)
# Normalize the mel spectrogram
melspec = (melspec.clip(min=-80, max=0) + 80) / 80
melspec = np.concatenate((melspec, np.zeros((64, 64 - melspec.shape[1]), dtype=melspec.dtype)), axis=1)
melspec = melspec.reshape(1, 64, 64)
return melspec
In [4]:
data_dir = pathlib.Path("data").resolve()
processed_dir = data_dir / "nsynth_processed_64x64"
In [5]:
processed_dir.mkdir(exist_ok=True)
In [6]:
def process(filename, out_dir):
out_filename = out_dir / f"{filename.stem}.npy"
if out_filename.is_file():
return
melspec = compute_melspec(filename)
np.save(out_filename, melspec)
In [8]:
with open(data_dir / "nsynth" / "nsynth-train" / "examples.json") as f:
data = json.load(f)
In [13]:
for key in ("train", "valid", "test"):
print(f"Working on {key} split...")
audio_dir = data_dir / "nsynth" / f"nsynth-{key}" / "audio"
print("Loading filenames...")
with open(data_dir / "nsynth" / f"nsynth-{key}" / "examples.json") as f:
data = json.load(f)
filenames = [audio_dir / f"{key}.wav" for key in data.keys()]
# filenames = list(audio_dir.rglob("*.wav"))
out_dir = processed_dir / key
out_dir.mkdir(exist_ok=True)
joblib.Parallel(n_jobs=20, verbose=1)(
joblib.delayed(process)(filename, out_dir) for filename in filenames
)
Working on train split... Loading filenames...
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers. [Parallel(n_jobs=20)]: Done 10 tasks | elapsed: 4.9s [Parallel(n_jobs=20)]: Done 260 tasks | elapsed: 5.2s [Parallel(n_jobs=20)]: Done 8385 tasks | elapsed: 1.1min [Parallel(n_jobs=20)]: Done 9370 tasks | elapsed: 1.2min [Parallel(n_jobs=20)]: Done 11090 tasks | elapsed: 1.2min [Parallel(n_jobs=20)]: Done 11640 tasks | elapsed: 1.3min [Parallel(n_jobs=20)]: Done 12860 tasks | elapsed: 1.4min [Parallel(n_jobs=20)]: Done 14360 tasks | elapsed: 1.6min [Parallel(n_jobs=20)]: Done 16060 tasks | elapsed: 1.7min [Parallel(n_jobs=20)]: Done 17960 tasks | elapsed: 1.9min [Parallel(n_jobs=20)]: Done 20060 tasks | elapsed: 2.1min [Parallel(n_jobs=20)]: Done 22360 tasks | elapsed: 2.3min [Parallel(n_jobs=20)]: Done 24860 tasks | elapsed: 2.6min [Parallel(n_jobs=20)]: Done 27560 tasks | elapsed: 2.8min [Parallel(n_jobs=20)]: Done 30460 tasks | elapsed: 3.1min [Parallel(n_jobs=20)]: Done 33560 tasks | elapsed: 3.4min [Parallel(n_jobs=20)]: Done 36860 tasks | elapsed: 3.8min [Parallel(n_jobs=20)]: Done 40360 tasks | elapsed: 4.1min [Parallel(n_jobs=20)]: Done 44060 tasks | elapsed: 4.5min [Parallel(n_jobs=20)]: Done 47960 tasks | elapsed: 4.8min [Parallel(n_jobs=20)]: Done 52060 tasks | elapsed: 5.2min [Parallel(n_jobs=20)]: Done 56360 tasks | elapsed: 5.7min [Parallel(n_jobs=20)]: Done 60860 tasks | elapsed: 6.1min [Parallel(n_jobs=20)]: Done 65560 tasks | elapsed: 6.6min [Parallel(n_jobs=20)]: Done 70460 tasks | elapsed: 7.0min [Parallel(n_jobs=20)]: Done 75560 tasks | elapsed: 7.5min [Parallel(n_jobs=20)]: Done 80860 tasks | elapsed: 8.1min [Parallel(n_jobs=20)]: Done 86360 tasks | elapsed: 8.6min [Parallel(n_jobs=20)]: Done 92060 tasks | elapsed: 9.2min [Parallel(n_jobs=20)]: Done 97960 tasks | elapsed: 9.7min [Parallel(n_jobs=20)]: Done 104060 tasks | elapsed: 10.3min [Parallel(n_jobs=20)]: Done 110360 tasks | elapsed: 11.0min [Parallel(n_jobs=20)]: Done 116860 tasks | elapsed: 11.6min [Parallel(n_jobs=20)]: Done 123560 tasks | elapsed: 12.2min [Parallel(n_jobs=20)]: Done 130460 tasks | elapsed: 12.9min [Parallel(n_jobs=20)]: Done 137560 tasks | elapsed: 13.5min [Parallel(n_jobs=20)]: Done 144860 tasks | elapsed: 14.1min [Parallel(n_jobs=20)]: Done 152360 tasks | elapsed: 14.8min [Parallel(n_jobs=20)]: Done 160060 tasks | elapsed: 15.5min [Parallel(n_jobs=20)]: Done 167960 tasks | elapsed: 16.2min [Parallel(n_jobs=20)]: Done 176060 tasks | elapsed: 16.9min [Parallel(n_jobs=20)]: Done 184360 tasks | elapsed: 17.6min [Parallel(n_jobs=20)]: Done 192860 tasks | elapsed: 18.3min [Parallel(n_jobs=20)]: Done 201560 tasks | elapsed: 19.1min [Parallel(n_jobs=20)]: Done 210460 tasks | elapsed: 19.9min [Parallel(n_jobs=20)]: Done 219560 tasks | elapsed: 20.7min [Parallel(n_jobs=20)]: Done 228860 tasks | elapsed: 21.5min [Parallel(n_jobs=20)]: Done 238360 tasks | elapsed: 22.3min [Parallel(n_jobs=20)]: Done 248060 tasks | elapsed: 23.1min [Parallel(n_jobs=20)]: Done 257960 tasks | elapsed: 24.0min [Parallel(n_jobs=20)]: Done 268060 tasks | elapsed: 24.8min [Parallel(n_jobs=20)]: Done 278360 tasks | elapsed: 25.7min [Parallel(n_jobs=20)]: Done 288860 tasks | elapsed: 26.6min [Parallel(n_jobs=20)]: Done 289166 out of 289205 | elapsed: 26.7min remaining: 0.2s [Parallel(n_jobs=20)]: Done 289205 out of 289205 | elapsed: 26.7min finished
Working on valid split... Loading filenames...
[Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers. [Parallel(n_jobs=20)]: Done 10 tasks | elapsed: 0.1s [Parallel(n_jobs=20)]: Done 280 tasks | elapsed: 1.6s [Parallel(n_jobs=20)]: Done 780 tasks | elapsed: 4.2s [Parallel(n_jobs=20)]: Done 1480 tasks | elapsed: 7.9s [Parallel(n_jobs=20)]: Done 2380 tasks | elapsed: 12.7s [Parallel(n_jobs=20)]: Done 3480 tasks | elapsed: 18.4s [Parallel(n_jobs=20)]: Done 4780 tasks | elapsed: 25.2s [Parallel(n_jobs=20)]: Done 6280 tasks | elapsed: 33.0s [Parallel(n_jobs=20)]: Done 7980 tasks | elapsed: 41.8s [Parallel(n_jobs=20)]: Done 9880 tasks | elapsed: 51.6s [Parallel(n_jobs=20)]: Done 11980 tasks | elapsed: 1.0min [Parallel(n_jobs=20)]: Done 12678 out of 12678 | elapsed: 1.1min finished [Parallel(n_jobs=20)]: Using backend LokyBackend with 20 concurrent workers. [Parallel(n_jobs=20)]: Done 10 tasks | elapsed: 0.1s
Working on test split... Loading filenames...
[Parallel(n_jobs=20)]: Done 280 tasks | elapsed: 1.7s [Parallel(n_jobs=20)]: Done 780 tasks | elapsed: 4.3s [Parallel(n_jobs=20)]: Done 1480 tasks | elapsed: 8.0s [Parallel(n_jobs=20)]: Done 2380 tasks | elapsed: 12.7s [Parallel(n_jobs=20)]: Done 3480 tasks | elapsed: 18.5s [Parallel(n_jobs=20)]: Done 4096 out of 4096 | elapsed: 21.5s finished
In [ ]: