"""Lakh MIDI Dataset."""
from pathlib import Path
from typing import Union
from ..inputs import read_midi
from ..music import Music
from .base import DatasetInfo, RemoteFolderDataset
# pylint: disable=line-too-long
_NAME = "Lakh MIDI Dataset"
_DESCRIPTION = """\
The Lakh MIDI dataset is a collection of 176,581 unique MIDI files, 45,129 of \
which have been matched and aligned to entries in the Million Song Dataset. \
Its goal is to facilitate large-scale music information retrieval, both \
symbolic (using the MIDI files alone) and audio content-based (using \
information extracted from the MIDI files as annotations for the matched \
audio files)."""
_HOMEPAGE = "https://colinraffel.com/projects/lmd/"
_LICENSE = "Creative Commons Attribution 4.0 International License (CC-By 4.0)"
_CITATION = """\
@phdthesis{raffel2016learning,
author={Colin Raffel},
title={Learning-Based Methods for Comparing Sequences, with Applications to Audio-to-{MIDI} Alignment and Matching},
year=2016
}"""
[docs]class LakhMIDIDataset(RemoteFolderDataset):
"""Lakh MIDI Dataset."""
_info = DatasetInfo(_NAME, _DESCRIPTION, _HOMEPAGE, _LICENSE)
_citation = _CITATION
_sources = {
"lmd_full": {
"filename": "lmd_full.tar.gz",
"url": "http://hog.ee.columbia.edu/craffel/lmd/lmd_full.tar.gz",
"archive": True,
"size": 1768163879,
"md5": "2536ce3fd2cede53ddaa264f731859ab",
"sha256": "6fcfe2ac49ca08f3f214cec86ab138d4fc4dabcd7f27f491a838dae6db45a12b",
}
}
_extension = "mid"
[docs] def read(self, filename: Union[str, Path]) -> Music:
"""Read a file into a Music object."""
return read_midi(self.root / filename)
[docs]class LakhMIDIMatchedDataset(RemoteFolderDataset):
"""Lakh MIDI Dataset - matched subset."""
_info = DatasetInfo(_NAME, _DESCRIPTION, _HOMEPAGE, _LICENSE)
_citation = _CITATION
_sources = {
"lmd_matched": {
"filename": "lmd_matched.tar.gz",
"url": "http://hog.ee.columbia.edu/craffel/lmd/lmd_matched.tar.gz",
"archive": True,
"size": 1407072670,
"md5": "fb80d01c22020295bb3eeef31f1aa63a",
"sha256": "621ff830aed771f469e5bfa13dc12a33c6ed69090adeda63d0b5c47783af0191",
}
}
_extension = "mid"
[docs] def read(self, filename: Union[str, Path]) -> Music:
"""Read a file into a Music object."""
return read_midi(self.root / filename)
[docs]class LakhMIDIAlignedDataset(RemoteFolderDataset):
"""Lakh MIDI Dataset - aligned subset."""
_info = DatasetInfo(_NAME, _DESCRIPTION, _HOMEPAGE, _LICENSE)
_citation = _CITATION
_sources = {
"lmd_aligned": {
"filename": "lmd_aligned.tar.gz",
"url": "http://hog.ee.columbia.edu/craffel/lmd/lmd_aligned.tar.gz",
"archive": True,
"size": 272169548,
"md5": "d36ca9159966d81d97e1e37d10ed4584",
"sha256": "2bf5400e82eba73204644946515489b68811e1e656b0cfd854efc14377f6e53b",
}
}
_extension = "mid"
[docs] def read(self, filename: Union[str, Path]) -> Music:
"""Read a file into a Music object."""
return read_midi(self.root / filename)