input.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from paddle import _C_ops
  15. from ...base.data_feeder import check_variable_and_dtype
  16. from ...base.layer_helper import LayerHelper
  17. from ...common_ops_import import Variable
  18. from ...framework import in_dynamic_or_pir_mode
  19. __all__ = []
  20. def one_hot(x, num_classes, name=None):
  21. """
  22. The operator converts each id in the input `x` to an one-hot vector with a
  23. `num_classes` length. The value in the vector dimension corresponding to the id
  24. is 1, and the value in the remaining dimension is 0.
  25. The shape of output Tensor is generated by appending `num_classes` dimension
  26. behind the last dimension of the `x` shape.
  27. .. code-block:: text
  28. Example 1:
  29. input:
  30. x.shape = [4]
  31. x.data = [1, 1, 3, 0]
  32. num_classes = 4
  33. output:
  34. Out.shape = [4, 4]
  35. Out.data = [[0., 1., 0., 0.],
  36. [0., 1., 0., 0.],
  37. [0., 0., 0., 1.],
  38. [1., 0., 0., 0.]]
  39. Example 2:
  40. input:
  41. x.shape = [4]
  42. x.data = [1, 1, 5, 0]
  43. num_classes = 4
  44. output: Throw an exception for Illegal value
  45. The second dimension in X is 5, which is greater than num_classes,
  46. so it throws an exception.
  47. Args:
  48. x(Tensor): Tensor with shape :math:`[N_1, N_2, ..., N_k]` ,
  49. which contains at least one dimension. The data type is int32 or int64.
  50. num_classes(int): An integer defining the `num_classes` of the one hot dimension. If input `x`
  51. is word id, `num_classes` is generally the dictionary size.
  52. name(str|None, optional): For detailed information, please refer
  53. to :ref:`api_guide_Name`. Usually name is no need to set and
  54. None by default.
  55. Returns:
  56. Tensor: The one-hot representations of `x`. A Tensor with type float32.
  57. Examples:
  58. .. code-block:: python
  59. >>> import paddle
  60. >>> # Correspond to the first example above, where label.shape is 4 and one_hot_label.shape is [4, 4].
  61. >>> label = paddle.to_tensor([1, 1, 3, 0], dtype='int64')
  62. >>> print(label.shape)
  63. [4]
  64. >>> one_hot_label = paddle.nn.functional.one_hot(label, num_classes=4)
  65. >>> print(one_hot_label.shape)
  66. [4, 4]
  67. >>> print(one_hot_label)
  68. Tensor(shape=[4, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
  69. [[0., 1., 0., 0.],
  70. [0., 1., 0., 0.],
  71. [0., 0., 0., 1.],
  72. [1., 0., 0., 0.]])
  73. """
  74. if in_dynamic_or_pir_mode():
  75. return _C_ops.one_hot(x, num_classes)
  76. else:
  77. check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'one_hot_v2')
  78. helper = LayerHelper("one_hot_v2", **locals())
  79. one_hot_out = helper.create_variable_for_type_inference(dtype='float32')
  80. if not isinstance(num_classes, Variable):
  81. # user attribute
  82. inputs = {'X': x}
  83. attrs = {'depth': num_classes, 'allow_out_of_range': False}
  84. else:
  85. num_classes.stop_gradient = True
  86. inputs = {'X': x, 'depth_tensor': num_classes}
  87. attrs = {'allow_out_of_range': False}
  88. helper.append_op(
  89. type="one_hot_v2",
  90. inputs=inputs,
  91. attrs=attrs,
  92. outputs={'Out': one_hot_out},
  93. stop_gradient=True,
  94. )
  95. return one_hot_out
  96. def embedding(x, weight, padding_idx=None, sparse=False, name=None):
  97. r"""
  98. Used to lookup embeddings vector of ids provided by :attr:`x` .
  99. The shape of output Tensor is generated by appending the last dimension of the input Tensor shape
  100. with embedding size.
  101. Note:
  102. The id in :attr:`x` must satisfy :math:`0 =< id < weight.shape[0]` ,
  103. otherwise the program will throw an exception and exit.
  104. .. code-block:: text
  105. x is a Tensor.
  106. padding_idx = -1
  107. x.data = [[1, 3], [2, 4], [4, 127]]
  108. x.shape = [3, 2]
  109. weight.shape = [128, 16]
  110. output is a Tensor:
  111. out.shape = [3, 2, 16]
  112. out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
  113. [0.345421456, 0.524563927, ..., 0.144534654]],
  114. [[0.345249859, 0.124939536, ..., 0.194353745],
  115. [0.945345345, 0.435394634, ..., 0.435345365]],
  116. [[0.945345345, 0.435394634, ..., 0.435345365],
  117. [0.0, 0.0, ..., 0.0 ]]] # padding data
  118. The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127
  119. It will pad all-zero data when id is 127.
  120. Args:
  121. x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should
  122. satisfy :math:`0<= id < weight.shape[0]` .
  123. weight (Tensor): The weight. A Tensor with shape of lookup table parameter. It should have two elements which
  124. indicates the size of the dictionary of embeddings and the size of each embedding vector respectively.
  125. sparse(bool, optional): The flag indicating whether to use sparse update. This parameter only
  126. affects the performance of the backwards gradient update. It is recommended to set
  127. True because sparse update is faster. But some optimizers does not support sparse update,
  128. such as :ref:`api_paddle_optimizer_adadelta_Adadelta` , :ref:`api_paddle_optimizer_adamax_Adamax` , :ref:`api_paddle_optimizer_lamb_Lamb`.
  129. In these cases, sparse must be False. Default: False.
  130. padding_idx(int|long|None, optional): padding_idx needs to be in the interval [-weight.shape[0], weight.shape[0]).
  131. If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted
  132. to :math:`weight.shape[0] + padding\_idx` . It will output all-zero padding data whenever lookup
  133. encounters :math:`padding\_idx` in id. And the padding data will not be updated while training.
  134. If set None, it makes no effect to output. Default: None.
  135. name(str|None, optional): For detailed information, please refer
  136. to :ref:`api_guide_Name`. Usually name is no need to set and
  137. None by default.
  138. Returns:
  139. Tensor: Embedding Tensor mapped by x. The data type is the same as :attr:`weight`.
  140. Examples:
  141. .. code-block:: python
  142. >>> import paddle
  143. >>> import paddle.nn as nn
  144. >>> x0 = paddle.arange(3, 6).reshape((3, 1)).astype(paddle.int64)
  145. >>> w0 = paddle.full(shape=(10, 3), fill_value=2).astype(paddle.float32)
  146. >>> x = paddle.to_tensor(x0, stop_gradient=False)
  147. >>> print(x.numpy())
  148. [[3]
  149. [4]
  150. [5]]
  151. >>> print(x.shape)
  152. [3, 1]
  153. >>> w = paddle.to_tensor(w0, stop_gradient=False)
  154. >>> print(w.numpy())
  155. [[2. 2. 2.]
  156. [2. 2. 2.]
  157. [2. 2. 2.]
  158. [2. 2. 2.]
  159. [2. 2. 2.]
  160. [2. 2. 2.]
  161. [2. 2. 2.]
  162. [2. 2. 2.]
  163. [2. 2. 2.]
  164. [2. 2. 2.]]
  165. >>> print(w.shape)
  166. [10, 3]
  167. >>> emb = nn.functional.embedding(
  168. ... x=x, weight=w, sparse=True, name="embedding")
  169. >>> print(emb.numpy())
  170. [[[2. 2. 2.]]
  171. [[2. 2. 2.]]
  172. [[2. 2. 2.]]]
  173. >>> print(emb.shape)
  174. [3, 1, 3]
  175. """
  176. padding_idx = (
  177. -1
  178. if padding_idx is None
  179. else padding_idx
  180. if padding_idx >= 0
  181. else (weight.shape[0] + padding_idx)
  182. )
  183. if padding_idx >= weight.shape[0] or padding_idx < -weight.shape[0]:
  184. raise ValueError(
  185. f"padding_idx must be within [-{weight.shape[0]}, {weight.shape[0]})"
  186. )
  187. if in_dynamic_or_pir_mode():
  188. return _C_ops.embedding(x, weight, padding_idx, sparse)
  189. else:
  190. helper = LayerHelper('embedding', **locals())
  191. dtype = helper.input_dtype(input_param_name='weight')
  192. check_variable_and_dtype(
  193. x,
  194. 'input',
  195. ['uint8', 'int8', 'int16', 'int32', 'int64'],
  196. 'embedding',
  197. )
  198. is_distributed = False
  199. remote_prefetch = sparse and (not is_distributed)
  200. tmp = helper.create_variable_for_type_inference(dtype)
  201. helper.append_op(
  202. type='lookup_table_v2',
  203. inputs={'Ids': x, 'W': weight},
  204. outputs={'Out': tmp},
  205. attrs={
  206. 'is_sparse': sparse,
  207. 'is_distributed': is_distributed,
  208. 'remote_prefetch': remote_prefetch,
  209. 'padding_idx': padding_idx,
  210. },
  211. )
  212. return tmp