backend.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License
  14. from pathlib import Path
  15. from typing import Optional, Tuple, Union
  16. import paddle
  17. class AudioInfo:
  18. """Audio info, return type of backend info function"""
  19. def __init__(
  20. self,
  21. sample_rate: int,
  22. num_samples: int,
  23. num_channels: int,
  24. bits_per_sample: int,
  25. encoding: str,
  26. ):
  27. self.sample_rate = sample_rate
  28. self.num_samples = num_samples
  29. self.num_channels = num_channels
  30. self.bits_per_sample = bits_per_sample
  31. self.encoding = encoding
  32. def info(filepath: str) -> AudioInfo:
  33. """Get signal information of input audio file.
  34. Args:
  35. filepath: audio path or file object.
  36. Returns:
  37. AudioInfo: info of the given audio.
  38. Example:
  39. .. code-block:: python
  40. >>> import os
  41. >>> import paddle
  42. >>> sample_rate = 16000
  43. >>> wav_duration = 0.5
  44. >>> num_channels = 1
  45. >>> num_frames = sample_rate * wav_duration
  46. >>> wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
  47. >>> waveform = wav_data.tile([num_channels, 1])
  48. >>> base_dir = os.getcwd()
  49. >>> filepath = os.path.join(base_dir, "test.wav")
  50. >>> paddle.audio.save(filepath, waveform, sample_rate)
  51. >>> wav_info = paddle.audio.info(filepath)
  52. """
  53. # for API doc
  54. raise NotImplementedError("please set audio backend")
  55. def load(
  56. filepath: Union[str, Path],
  57. frame_offset: int = 0,
  58. num_frames: int = -1,
  59. normalize: bool = True,
  60. channels_first: bool = True,
  61. ) -> Tuple[paddle.Tensor, int]:
  62. """Load audio data from file.Load the audio content start form frame_offset, and get num_frames.
  63. Args:
  64. frame_offset: from 0 to total frames,
  65. num_frames: from -1 (means total frames) or number frames which want to read,
  66. normalize:
  67. if True: return audio which norm to (-1, 1), dtype=float32
  68. if False: return audio with raw data, dtype=int16
  69. channels_first:
  70. if True: return audio with shape (channels, time)
  71. Return:
  72. Tuple[paddle.Tensor, int]: (audio_content, sample rate)
  73. Examples:
  74. .. code-block:: python
  75. >>> import os
  76. >>> import paddle
  77. >>> sample_rate = 16000
  78. >>> wav_duration = 0.5
  79. >>> num_channels = 1
  80. >>> num_frames = sample_rate * wav_duration
  81. >>> wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
  82. >>> waveform = wav_data.tile([num_channels, 1])
  83. >>> base_dir = os.getcwd()
  84. >>> filepath = os.path.join(base_dir, "test.wav")
  85. >>> paddle.audio.save(filepath, waveform, sample_rate)
  86. >>> wav_data_read, sr = paddle.audio.load(filepath)
  87. """
  88. # for API doc
  89. raise NotImplementedError("please set audio backend")
  90. def save(
  91. filepath: str,
  92. src: paddle.Tensor,
  93. sample_rate: int,
  94. channels_first: bool = True,
  95. encoding: Optional[str] = None,
  96. bits_per_sample: Optional[int] = 16,
  97. ):
  98. """
  99. Save audio tensor to file.
  100. Args:
  101. filepath: saved path
  102. src: the audio tensor
  103. sample_rate: the number of samples of audio per second.
  104. channels_first: src channel information
  105. if True, means input tensor is (channels, time)
  106. if False, means input tensor is (time, channels)
  107. encoding:encoding format, wave_backend only support PCM16 now.
  108. bits_per_sample: bits per sample, wave_backend only support 16 bits now.
  109. Returns:
  110. None
  111. Examples:
  112. .. code-block:: python
  113. >>> import paddle
  114. >>> sample_rate = 16000
  115. >>> wav_duration = 0.5
  116. >>> num_channels = 1
  117. >>> num_frames = sample_rate * wav_duration
  118. >>> wav_data = paddle.linspace(-1.0, 1.0, num_frames) * 0.1
  119. >>> waveform = wav_data.tile([num_channels, 1])
  120. >>> filepath = "./test.wav"
  121. >>> paddle.audio.save(filepath, waveform, sample_rate)
  122. """
  123. # for API doc
  124. raise NotImplementedError("please set audio backend")