esc50.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import collections
  15. import os
  16. from typing import List, Tuple
  17. from paddle.dataset.common import DATA_HOME
  18. from paddle.utils import download
  19. from .dataset import AudioClassificationDataset
  20. __all__ = []
  21. class ESC50(AudioClassificationDataset):
  22. """
  23. The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings
  24. suitable for benchmarking methods of environmental sound classification. The dataset
  25. consists of 5-second-long recordings organized into 50 semantical classes (with
  26. 40 examples per class)
  27. Reference:
  28. ESC: Dataset for Environmental Sound Classification
  29. http://dx.doi.org/10.1145/2733373.2806390
  30. Args:
  31. mode (str, optional): It identifies the dataset mode (train or dev). Default:train.
  32. split (int, optional): It specify the fold of dev dataset. Default:1.
  33. feat_type (str, optional): It identifies the feature type that user wants to extract of an audio file. Default:raw.
  34. archive(dict, optional): it tells where to download the audio archive. Default:None.
  35. Returns:
  36. :ref:`api_paddle_io_Dataset`. An instance of ESC50 dataset.
  37. Examples:
  38. .. code-block:: python
  39. >>> import paddle
  40. >>> mode = 'dev'
  41. >>> esc50_dataset = paddle.audio.datasets.ESC50(mode=mode,
  42. ... feat_type='raw')
  43. >>> for idx in range(5):
  44. ... audio, label = esc50_dataset[idx]
  45. ... # do something with audio, label
  46. ... print(audio.shape, label)
  47. ... # [audio_data_length] , label_id
  48. [220500] 0
  49. [220500] 14
  50. [220500] 36
  51. [220500] 36
  52. [220500] 19
  53. >>> esc50_dataset = paddle.audio.datasets.ESC50(mode=mode,
  54. ... feat_type='mfcc',
  55. ... n_mfcc=40)
  56. >>> for idx in range(5):
  57. ... audio, label = esc50_dataset[idx]
  58. ... # do something with mfcc feature, label
  59. ... print(audio.shape, label)
  60. ... # [feature_dim, length] , label_id
  61. [40, 1723] 0
  62. [40, 1723] 14
  63. [40, 1723] 36
  64. [40, 1723] 36
  65. [40, 1723] 19
  66. """
  67. archive = {
  68. 'url': 'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
  69. 'md5': '7771e4b9d86d0945acce719c7a59305a',
  70. }
  71. label_list = [
  72. # Animals
  73. 'Dog',
  74. 'Rooster',
  75. 'Pig',
  76. 'Cow',
  77. 'Frog',
  78. 'Cat',
  79. 'Hen',
  80. 'Insects (flying)',
  81. 'Sheep',
  82. 'Crow',
  83. # Natural soundscapes & water sounds
  84. 'Rain',
  85. 'Sea waves',
  86. 'Crackling fire',
  87. 'Crickets',
  88. 'Chirping birds',
  89. 'Water drops',
  90. 'Wind',
  91. 'Pouring water',
  92. 'Toilet flush',
  93. 'Thunderstorm',
  94. # Human, non-speech sounds
  95. 'Crying baby',
  96. 'Sneezing',
  97. 'Clapping',
  98. 'Breathing',
  99. 'Coughing',
  100. 'Footsteps',
  101. 'Laughing',
  102. 'Brushing teeth',
  103. 'Snoring',
  104. 'Drinking, sipping',
  105. # Interior/domestic sounds
  106. 'Door knock',
  107. 'Mouse click',
  108. 'Keyboard typing',
  109. 'Door, wood creaks',
  110. 'Can opening',
  111. 'Washing machine',
  112. 'Vacuum cleaner',
  113. 'Clock alarm',
  114. 'Clock tick',
  115. 'Glass breaking',
  116. # Exterior/urban noises
  117. 'Helicopter',
  118. 'Chainsaw',
  119. 'Siren',
  120. 'Car horn',
  121. 'Engine',
  122. 'Train',
  123. 'Church bells',
  124. 'Airplane',
  125. 'Fireworks',
  126. 'Hand saw',
  127. ]
  128. meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
  129. meta_info = collections.namedtuple(
  130. 'META_INFO',
  131. ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'),
  132. )
  133. audio_path = os.path.join('ESC-50-master', 'audio')
  134. def __init__(
  135. self,
  136. mode: str = 'train',
  137. split: int = 1,
  138. feat_type: str = 'raw',
  139. archive=None,
  140. **kwargs,
  141. ):
  142. assert split in range(
  143. 1, 6
  144. ), f'The selected split should be integer, and 1 <= split <= 5, but got {split}'
  145. if archive is not None:
  146. self.archive = archive
  147. files, labels = self._get_data(mode, split)
  148. super().__init__(
  149. files=files, labels=labels, feat_type=feat_type, **kwargs
  150. )
  151. def _get_meta_info(self) -> List[collections.namedtuple]:
  152. ret = []
  153. with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
  154. for line in rf.readlines()[1:]:
  155. ret.append(self.meta_info(*line.strip().split(',')))
  156. return ret
  157. def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
  158. if not os.path.isdir(
  159. os.path.join(DATA_HOME, self.audio_path)
  160. ) or not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
  161. download.get_path_from_url(
  162. self.archive['url'],
  163. DATA_HOME,
  164. self.archive['md5'],
  165. decompress=True,
  166. )
  167. meta_info = self._get_meta_info()
  168. files = []
  169. labels = []
  170. for sample in meta_info:
  171. filename, fold, target, _, _, _, _ = sample
  172. if mode == 'train' and int(fold) != split:
  173. files.append(os.path.join(DATA_HOME, self.audio_path, filename))
  174. labels.append(int(target))
  175. if mode != 'train' and int(fold) == split:
  176. files.append(os.path.join(DATA_HOME, self.audio_path, filename))
  177. labels.append(int(target))
  178. return files, labels