initializer.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import functools
  15. import math
  16. import numpy as np
  17. from ...base.framework import (
  18. EagerParamBase,
  19. default_main_program,
  20. in_dygraph_mode,
  21. )
  22. from .lazy_init import lazy_init_helper
  23. __all__ = []
  24. class Initializer:
  25. """Base class for parameter initializers
  26. Defines the common interface of parameter initializers.
  27. They add operations to the init program that are used
  28. to initialize parameter. Users should not use this class
  29. directly, but need to use one of its implementations.
  30. """
  31. def __init__(self):
  32. pass
  33. def __call__(self, param, block=None):
  34. if not lazy_init_helper().state:
  35. return self.forward(param, block)
  36. return self._lazy_init(param, block)
  37. def forward(self, param, block=None):
  38. """Add corresponding initialization operations to the network."""
  39. raise NotImplementedError()
  40. def _lazy_init(self, param, block=None):
  41. """
  42. Apply lazy initialization
  43. """
  44. assert in_dygraph_mode()
  45. def init_op_creator(forward, param, block):
  46. new_var = param._to_static_var(True, block=block)
  47. # Record initializer operator
  48. with lazy_init_helper():
  49. forward(new_var, block)
  50. # Add hook function for initializing param in dygraph mode
  51. param.set_init_func(functools.partial(self.forward))
  52. param._init_op_creator = functools.partial(
  53. init_op_creator, self.forward
  54. )
  55. return param
  56. def _check_block(self, block):
  57. if block is None:
  58. block = default_main_program().global_block()
  59. return block
  60. def _compute_fans(self, var):
  61. """Compute the fan_in and the fan_out for layers
  62. This method computes the fan_in and the fan_out
  63. for neural network layers, if not specified. It is
  64. not possible to perfectly estimate fan_in and fan_out.
  65. This method will estimate it correctly for matrix multiply and
  66. convolutions.
  67. Args:
  68. var: variable for which fan_in and fan_out have to be computed.
  69. Returns:
  70. tuple of two integers (fan_in, fan_out).
  71. """
  72. shape = (
  73. var._local_shape
  74. if (isinstance(var, EagerParamBase) and var.is_dist())
  75. else var.shape
  76. )
  77. if not shape or len(shape) == 0:
  78. fan_in = fan_out = 1
  79. elif len(shape) == 1:
  80. fan_in = fan_out = shape[0]
  81. elif len(shape) == 2:
  82. # This is the case for simple matrix multiply
  83. fan_in = shape[0]
  84. fan_out = shape[1]
  85. else:
  86. # Assume this to be a convolutional kernel
  87. # In PaddlePaddle, the shape of the kernel is like:
  88. # [num_filters, num_filter_channels, ...] where the remaining
  89. # dimensions are the filter_size
  90. receptive_field_size = np.prod(shape[2:])
  91. fan_in = shape[1] * receptive_field_size
  92. fan_out = shape[0] * receptive_field_size
  93. return (fan_in, fan_out)
  94. def calculate_gain(nonlinearity, param=None):
  95. """
  96. Get the recommended ``gain`` value of some nonlinearity function. ``gain`` value can be used in some
  97. ``paddle.nn.initializer`` api to adjust the initialization value.
  98. Args:
  99. nonlinearity(str): name of nonlinearity activation function. If it is a linear function, such as:
  100. `linear/conv1d/conv2d/conv3d/conv1d_transpose/conv2d_transpose/conv3d_transpose` , 1.0 will be returned.
  101. param(bool|int|float, optional): optional parameter for somme nonlinearity function. Now, it only applies to
  102. 'leaky_relu'. Default: None, it will be calculated as 0.01 in the formula.
  103. Returns:
  104. A float value, which is the recommended gain for this nonlinearity function.
  105. Examples:
  106. .. code-block:: python
  107. >>> import paddle
  108. >>> gain = paddle.nn.initializer.calculate_gain('tanh')
  109. >>> print(gain)
  110. 1.6666666666666667
  111. >>> # 5.0 / 3
  112. >>> gain = paddle.nn.initializer.calculate_gain('leaky_relu', param=1.0)
  113. >>> print(gain)
  114. 1.0
  115. >>> # math.sqrt(2.0 / (1+param^2))
  116. >>> initializer = paddle.nn.initializer.Orthogonal(gain)
  117. """
  118. if param is None:
  119. param = 0.01
  120. else:
  121. assert isinstance(param, (bool, int, float))
  122. param = float(param)
  123. recommended_gain = {
  124. 'sigmoid': 1,
  125. 'linear': 1,
  126. 'conv1d': 1,
  127. 'conv2d': 1,
  128. 'conv3d': 1,
  129. 'conv1d_transpose': 1,
  130. 'conv2d_transpose': 1,
  131. 'conv3d_transpose': 1,
  132. 'tanh': 5.0 / 3,
  133. 'relu': math.sqrt(2.0),
  134. 'leaky_relu': math.sqrt(2.0 / (1 + param**2)),
  135. 'selu': 3.0 / 4,
  136. }
  137. if nonlinearity in recommended_gain.keys():
  138. return recommended_gain[nonlinearity]
  139. else:
  140. raise ValueError(
  141. f"nonlinearity function {nonlinearity} is not supported now."
  142. )