assign.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. from paddle import _C_ops
  16. from ...base import core, framework, unique_name
  17. from ...base.data_feeder import check_type
  18. from ...base.framework import (
  19. _current_expected_place,
  20. in_dygraph_mode,
  21. in_pir_mode,
  22. )
  23. from .initializer import Initializer
  24. __all__ = []
  25. class NumpyArrayInitializer(Initializer):
  26. """Init an parameter with an numpy array
  27. This api initialize the tensor by numpy array.
  28. Args:
  29. value (numpy): numpy array to initialize the tensor
  30. Returns:
  31. A Tensor initialized by numpy.
  32. """
  33. def __init__(self, value):
  34. import numpy
  35. assert isinstance(value, numpy.ndarray)
  36. super().__init__()
  37. self._value = value
  38. def forward(self, var, block=None):
  39. """Initialize the input tensor with Numpy array.
  40. Args:
  41. var(Tensor): Tensor that needs to be initialized.
  42. block(Block, optional): The block in which initialization ops
  43. should be added. Used in static graph only, default None.
  44. Returns:
  45. The initialization op
  46. """
  47. assert not (
  48. isinstance(var, framework.EagerParamBase) and var.is_dist()
  49. ), "Currently, assign initializer not support lazy init for dist param."
  50. block = self._check_block(block)
  51. assert isinstance(
  52. var, (framework.Variable, paddle.pir.core.ParameterMeta)
  53. )
  54. assert isinstance(block, (framework.Block, paddle.pir.Block))
  55. # to be compatible of fp16 initializers
  56. if var.dtype in [core.VarDesc.VarType.FP16, core.VarDesc.VarType.BF16]:
  57. out_dtype = core.VarDesc.VarType.FP32
  58. np_value = self._value.astype("float32")
  59. out_var = block.create_var(
  60. name=unique_name.generate(
  61. ".".join(['numpy_array_init', var.name, 'tmp'])
  62. ),
  63. shape=var.shape,
  64. dtype=out_dtype,
  65. type=core.VarDesc.VarType.LOD_TENSOR,
  66. persistable=False,
  67. )
  68. elif var.dtype in [core.DataType.FLOAT16, core.DataType.BFLOAT16]:
  69. out_var = var
  70. out_dtype = core.DataType.FLOAT32
  71. np_value = self._value.astype("float32")
  72. else:
  73. out_var = var
  74. out_dtype = var.dtype
  75. np_value = self._value
  76. if out_dtype in (core.VarDesc.VarType.FP32, core.DataType.FLOAT32):
  77. value_name = "values"
  78. values = [float(v) for v in np_value.flat]
  79. elif out_dtype in (core.VarDesc.VarType.FP64, core.DataType.FLOAT64):
  80. value_name = "values"
  81. values = [float(v) for v in np_value.flat]
  82. elif out_dtype in (core.VarDesc.VarType.INT32, core.DataType.INT32):
  83. value_name = "values"
  84. values = [int(v) for v in np_value.flat]
  85. elif out_dtype in (
  86. core.VarDesc.VarType.INT8,
  87. core.VarDesc.VarType.UINT8,
  88. core.DataType.INT8,
  89. core.DataType.UINT8,
  90. ):
  91. value_name = "int8_values"
  92. values = [int(v) for v in np_value.flat]
  93. else:
  94. raise ValueError("Unsupported dtype %s", self._value.dtype)
  95. if self._value.size > 1024 * 1024 * 1024:
  96. raise ValueError(
  97. "The size of input is too big. Please consider "
  98. "saving it to file and 'load_op' to load it"
  99. )
  100. if in_dygraph_mode():
  101. _C_ops.assign_value_(
  102. out_var,
  103. list(self._value.shape),
  104. out_dtype,
  105. values,
  106. _current_expected_place(),
  107. )
  108. if var.dtype in [
  109. core.VarDesc.VarType.FP16,
  110. core.VarDesc.VarType.BF16,
  111. ]:
  112. var_tmp = _C_ops.cast(out_var, var.dtype)
  113. var_tmp._share_underline_tensor_to(var)
  114. else:
  115. out_var._share_underline_tensor_to(var)
  116. return None
  117. elif in_pir_mode():
  118. out_var = _C_ops.assign_value(
  119. list(self._value.shape),
  120. out_dtype,
  121. values,
  122. _current_expected_place(),
  123. )
  124. if var.dtype in [core.DataType.FLOAT16, core.DataType.BFLOAT16]:
  125. out_var = _C_ops.cast(out_var, var.dtype)
  126. return out_var
  127. else:
  128. op = block.append_op(
  129. type='assign_value',
  130. outputs={'Out': out_var},
  131. attrs={
  132. 'dtype': out_dtype,
  133. 'shape': list(self._value.shape),
  134. value_name: values,
  135. },
  136. stop_gradient=True,
  137. )
  138. if var.dtype in [
  139. core.VarDesc.VarType.FP16,
  140. core.VarDesc.VarType.BF16,
  141. ]:
  142. block.append_op(
  143. type="cast",
  144. inputs={"X": out_var},
  145. outputs={"Out": var},
  146. attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype},
  147. )
  148. var.op = op
  149. return op
  150. class Assign(NumpyArrayInitializer):
  151. """Init an parameter with a numpy array, list, or tensor.
  152. Args:
  153. value (Tensor|numpy.ndarray|list|tuple): numpy array, list, tuple, or tensor to initialize the parameter.
  154. name(str, optional): Normally there is no need for user to set this
  155. property. For more information, please refer to :ref:`api_guide_Name`. Default is None.
  156. Returns:
  157. A parameter initialized by the input numpy array, list, or tensor.
  158. Examples:
  159. .. code-block:: python
  160. >>> import paddle
  161. >>> import numpy as np
  162. >>> # numpy array
  163. >>> data_1 = paddle.ones(shape=[1, 2], dtype='float32')
  164. >>> weight_attr_1 = paddle.ParamAttr(
  165. ... name="linear_weight_1",
  166. ... initializer=paddle.nn.initializer.Assign(np.array([2, 2])))
  167. >>> bias_attr_1 = paddle.ParamAttr(
  168. ... name="linear_bias_1",
  169. ... initializer=paddle.nn.initializer.Assign(np.array([2])))
  170. >>> linear_1 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_1, bias_attr=bias_attr_1)
  171. >>> print(linear_1.weight.numpy())
  172. [2. 2.]
  173. >>> print(linear_1.bias.numpy())
  174. [2.]
  175. >>> res_1 = linear_1(data_1)
  176. >>> print(res_1.numpy())
  177. [6.]
  178. >>> # python list
  179. >>> data_2 = paddle.ones(shape=[1, 2], dtype='float32')
  180. >>> weight_attr_2 = paddle.ParamAttr(
  181. ... name="linear_weight_2",
  182. ... initializer=paddle.nn.initializer.Assign([2, 2]))
  183. >>> bias_attr_2 = paddle.ParamAttr(
  184. ... name="linear_bias_2",
  185. ... initializer=paddle.nn.initializer.Assign([2]))
  186. >>> linear_2 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_2, bias_attr=bias_attr_2)
  187. >>> print(linear_2.weight.numpy())
  188. [2. 2.]
  189. >>> print(linear_2.bias.numpy())
  190. [2.]
  191. >>> res_2 = linear_2(data_2)
  192. >>> print(res_2.numpy())
  193. [6.]
  194. >>> # tensor
  195. >>> data_3 = paddle.ones(shape=[1, 2], dtype='float32')
  196. >>> weight_attr_3 = paddle.ParamAttr(
  197. ... name="linear_weight_3",
  198. ... initializer=paddle.nn.initializer.Assign(paddle.full([2], 2)))
  199. >>> bias_attr_3 = paddle.ParamAttr(
  200. ... name="linear_bias_3",
  201. ... initializer=paddle.nn.initializer.Assign(paddle.full([1], 2)))
  202. >>> linear_3 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_3, bias_attr=bias_attr_3)
  203. >>> print(linear_3.weight.numpy())
  204. [2. 2.]
  205. >>> print(linear_3.bias.numpy())
  206. [2.]
  207. >>> res_3 = linear_3(data_3)
  208. >>> print(res_3.numpy())
  209. [6.]
  210. """
  211. def __init__(self, value, name=None):
  212. import numpy
  213. check_type(
  214. value,
  215. 'value',
  216. (numpy.ndarray, list, tuple, paddle.static.Variable),
  217. 'Assign',
  218. )
  219. if isinstance(value, (list, tuple)):
  220. value = numpy.array(value)
  221. # TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized.
  222. if isinstance(value, paddle.static.Variable):
  223. value = value.numpy(False)
  224. super().__init__(value)