layer_helper.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import copy
  15. import paddle
  16. from paddle import _C_ops
  17. from . import unique_name
  18. from .dygraph_utils import _append_activation_in_dygraph
  19. from .framework import (
  20. Parameter,
  21. dtype_is_floating,
  22. in_dygraph_mode,
  23. in_pir_mode,
  24. )
  25. from .layer_helper_base import LayerHelperBase
  26. from .param_attr import ParamAttr
  27. class LayerHelper(LayerHelperBase):
  28. def __init__(self, layer_type, **kwargs):
  29. self.kwargs = kwargs
  30. name = self.kwargs.get('name', None)
  31. # TODO(panyx0718, minqiyang): dygraph mode
  32. # can not use both `layer_type` and `name`. Deprecate LayerHelper
  33. # and write a Helper for dygraph mode.
  34. if name is None:
  35. if in_dygraph_mode():
  36. self.kwargs['name'] = unique_name.generate(layer_type)
  37. else:
  38. self.kwargs[
  39. 'name'
  40. ] = self.main_program._name_generator.generate(layer_type)
  41. super().__init__(self.kwargs['name'], layer_type=layer_type)
  42. def append_op(self, *args, **kwargs):
  43. return self.main_program.current_block().append_op(*args, **kwargs)
  44. def multiple_input(self, input_param_name='input'):
  45. inputs = self.kwargs.get(input_param_name, [])
  46. ret = []
  47. if isinstance(inputs, (list, tuple)):
  48. for inp in inputs:
  49. ret.append(self.to_variable(inp))
  50. else:
  51. ret.append(self.to_variable(inputs))
  52. return ret
  53. def input(self, input_param_name='input'):
  54. inputs = self.multiple_input(input_param_name)
  55. if len(inputs) != 1:
  56. raise f"{self.layer_type} layer only takes one input"
  57. return inputs[0]
  58. @property
  59. def param_attr(self):
  60. return ParamAttr._to_attr(self.kwargs.get('param_attr', None))
  61. @property
  62. def bias_attr(self):
  63. return ParamAttr._to_attr(self.kwargs.get('bias_attr', None))
  64. # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of param_attr
  65. def multiple_param_attr(self, length):
  66. param_attr = self.param_attr
  67. if isinstance(param_attr, ParamAttr):
  68. param_attr = [param_attr]
  69. if len(param_attr) != 1 and len(param_attr) != length:
  70. raise ValueError("parameter number mismatch")
  71. elif len(param_attr) == 1 and length != 1:
  72. tmp = [None] * length
  73. for i in range(length):
  74. tmp[i] = copy.deepcopy(param_attr[0])
  75. param_attr = tmp
  76. return param_attr
  77. def iter_inputs_and_params(self, input_param_name='input'):
  78. inputs = self.multiple_input(input_param_name)
  79. param_attrs = self.multiple_param_attr(len(inputs))
  80. yield from zip(inputs, param_attrs)
  81. def input_dtype(self, input_param_name='input'):
  82. inputs = self.multiple_input(input_param_name)
  83. dtype = None
  84. for each in inputs:
  85. if dtype is None:
  86. dtype = each.dtype
  87. elif dtype != each.dtype:
  88. raise ValueError(
  89. "Data Type mismatch: %d to %d" % (dtype, each.dtype)
  90. )
  91. return dtype
  92. def get_parameter(self, name):
  93. param = self.main_program.global_block().var(name)
  94. if not isinstance(param, Parameter):
  95. raise ValueError("no Parameter name %s found" % name)
  96. return param
  97. # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of bias_attr
  98. def append_bias_op(self, input_var, dim_start=1, dim_end=None):
  99. """
  100. Append bias operator and return its output. If the user does not set
  101. bias_attr, append_bias_op will return input_var
  102. :param input_var: the input variable. The len(input_var.shape) is
  103. larger or equal than 2.
  104. :bias_initializer: an instance of a subclass of Initializer used to
  105. initialize the bias
  106. :param dim_start:
  107. :param dim_end: the shape of the bias will be
  108. input_var.shape[dim_start:dim_end]. The bias is broadcasted to other
  109. dimensions and added to input_var to get the output
  110. """
  111. size = list(input_var.shape[dim_start:dim_end])
  112. bias_attr = self.bias_attr
  113. if not bias_attr:
  114. return input_var
  115. b = self.create_parameter(
  116. attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True
  117. )
  118. if in_pir_mode():
  119. return input_var + b
  120. tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
  121. self.append_op(
  122. type='elementwise_add',
  123. inputs={'X': [input_var], 'Y': [b]},
  124. outputs={'Out': [tmp]},
  125. attrs={'axis': dim_start},
  126. )
  127. return tmp
  128. # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of act
  129. def append_activation(self, input_var):
  130. act = self.kwargs.get('act', None)
  131. if act is None:
  132. return input_var
  133. if isinstance(act, str):
  134. act = {'type': act}
  135. else:
  136. raise TypeError(str(act) + " should be unicode or str")
  137. use_cudnn = None
  138. if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'):
  139. use_cudnn = self.kwargs.get('use_cudnn')
  140. act['use_cudnn'] = use_cudnn
  141. act_type = act.pop('type')
  142. if in_dygraph_mode():
  143. res = _append_activation_in_dygraph(input_var, act_type, use_cudnn)
  144. return res
  145. elif in_pir_mode():
  146. def _append_activation_in_pir(input, act=None, use_cudnn=None):
  147. if act is None:
  148. return input
  149. attrs = ()
  150. if use_cudnn:
  151. attrs = ('use_cudnn', use_cudnn)
  152. act_op = getattr(_C_ops, act)
  153. if act == 'softmax':
  154. return act_op(input, -1)
  155. return act_op(input, *attrs)
  156. return _append_activation_in_pir(input_var, act_type, use_cudnn)
  157. else:
  158. tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
  159. self.append_op(
  160. type=act_type,
  161. inputs={"X": [input_var]},
  162. outputs={"Out": [tmp]},
  163. attrs=act,
  164. )
  165. return tmp
  166. # TODO (jiabin): should we remove this since it has never be used
  167. def _get_default_initializer(self, dtype):
  168. if dtype is None or dtype_is_floating(dtype) is True:
  169. return paddle.nn.initializer.XavierUniform()
  170. else:
  171. # For integer and boolean types, initialize with all zeros
  172. return paddle.nn.initializer.Constant()
  173. # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs
  174. def is_instance(self, param_name, cls):
  175. param = self.kwargs.get(param_name, None)
  176. if not isinstance(param, cls):
  177. raise TypeError(
  178. "The input {0} parameter of method {1} must be {2}",
  179. param_name,
  180. self.layer_type,
  181. cls.__name__,
  182. )