poisson.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from collections.abc import Sequence
  15. import paddle
  16. from paddle.distribution import distribution
  17. class Poisson(distribution.Distribution):
  18. r"""
  19. The Poisson distribution with occurrence rate parameter: `rate`.
  20. In probability theory and statistics, the Poisson distribution is the most basic discrete probability
  21. distribution defined on the nonnegative integer set, which is used to describe the probability distribution of the number of random
  22. events occurring per unit time.
  23. The probability mass function (pmf) is
  24. .. math::
  25. pmf(x; \lambda) = \frac{e^{-\lambda} \cdot \lambda^x}{x!}
  26. In the above equation:
  27. * :math:`rate = \lambda`: is the mean occurrence rate.
  28. Args:
  29. rate(int|float|Tensor): The mean occurrence rate of Poisson distribution which should be greater than 0, meaning the expected occurrence
  30. times of an event in a fixed time interval. If the input data type is int or float, the data type of `rate` will be converted to a
  31. 1-D Tensor with paddle global default dtype.
  32. Examples:
  33. .. code-block:: python
  34. >>> import paddle
  35. >>> from paddle.distribution import Poisson
  36. >>> paddle.set_device('cpu')
  37. >>> paddle.seed(100)
  38. >>> rv = Poisson(paddle.to_tensor(30.0))
  39. >>> print(rv.sample([3]))
  40. Tensor(shape=[3, 1], dtype=float32, place=Place(cpu), stop_gradient=True,
  41. [[35.],
  42. [35.],
  43. [30.]])
  44. >>> print(rv.mean)
  45. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  46. 30.)
  47. >>> print(rv.entropy())
  48. Tensor(shape=[1], dtype=float32, place=Place(cpu), stop_gradient=True,
  49. [3.11671066])
  50. >>> rv1 = Poisson(paddle.to_tensor([[30.,40.],[8.,5.]]))
  51. >>> rv2 = Poisson(paddle.to_tensor([[1000.,40.],[7.,10.]]))
  52. >>> print(rv1.kl_divergence(rv2))
  53. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
  54. [[864.80285645, 0. ],
  55. [0.06825157 , 1.53426421 ]])
  56. """
  57. def __init__(self, rate):
  58. self.dtype = paddle.get_default_dtype()
  59. self.rate = self._to_tensor(rate)
  60. if not self._check_constraint(self.rate):
  61. raise ValueError(
  62. 'Every element of input parameter `rate` should be nonnegative.'
  63. )
  64. if self.rate.shape == []:
  65. batch_shape = (1,)
  66. else:
  67. batch_shape = self.rate.shape
  68. super().__init__(batch_shape)
  69. def _to_tensor(self, rate):
  70. """Convert the input parameters into tensors.
  71. Returns:
  72. Tensor: converted rate.
  73. """
  74. # convert type
  75. if isinstance(rate, (float, int)):
  76. rate = paddle.to_tensor([rate], dtype=self.dtype)
  77. else:
  78. self.dtype = rate.dtype
  79. return rate
  80. def _check_constraint(self, value):
  81. """Check the constraint for input parameters
  82. Args:
  83. value (Tensor)
  84. Returns:
  85. bool: pass or not.
  86. """
  87. return (value >= 0).all()
  88. @property
  89. def mean(self):
  90. """Mean of poisson distribution.
  91. Returns:
  92. Tensor: mean value.
  93. """
  94. return self.rate
  95. @property
  96. def variance(self):
  97. """Variance of poisson distribution.
  98. Returns:
  99. Tensor: variance value.
  100. """
  101. return self.rate
  102. def sample(self, shape=()):
  103. """Generate poisson samples of the specified shape. The final shape would be ``shape+batch_shape`` .
  104. Args:
  105. shape (Sequence[int], optional): Prepended shape of the generated samples.
  106. Returns:
  107. Tensor: Sampled data with shape `sample_shape` + `batch_shape`.
  108. """
  109. if not isinstance(shape, Sequence):
  110. raise TypeError('sample shape must be Sequence object.')
  111. shape = tuple(shape)
  112. batch_shape = tuple(self.batch_shape)
  113. output_shape = tuple(shape + batch_shape)
  114. output_rate = paddle.broadcast_to(self.rate, shape=output_shape)
  115. with paddle.no_grad():
  116. return paddle.poisson(output_rate)
  117. def entropy(self):
  118. r"""Shannon entropy in nats.
  119. The entropy is
  120. .. math::
  121. \mathcal{H}(X) = - \sum_{x \in \Omega} p(x) \log{p(x)}
  122. In the above equation:
  123. * :math:`\Omega`: is the support of the distribution.
  124. Returns:
  125. Tensor: Shannon entropy of poisson distribution. The data type is the same as `rate`.
  126. """
  127. values = self._enumerate_bounded_support(self.rate).reshape(
  128. (-1,) + (1,) * len(self.batch_shape)
  129. )
  130. log_prob = self.log_prob(values)
  131. proposed = -(paddle.exp(log_prob) * log_prob).sum(0)
  132. mask = paddle.cast(
  133. paddle.not_equal(
  134. self.rate, paddle.to_tensor(0.0, dtype=self.dtype)
  135. ),
  136. dtype=self.dtype,
  137. )
  138. return paddle.multiply(proposed, mask)
  139. def _enumerate_bounded_support(self, rate):
  140. """Generate a bounded approximation of the support. Approximately view Poisson r.v. as a
  141. Normal r.v. with mu = rate and sigma = sqrt(rate). Then by 30-sigma rule, generate a bounded
  142. approximation of the support.
  143. Args:
  144. rate (float): rate of one poisson r.v.
  145. Returns:
  146. Tensor: the bounded approximation of the support
  147. """
  148. s_max = (
  149. paddle.sqrt(paddle.max(rate))
  150. if paddle.greater_equal(
  151. paddle.max(rate), paddle.to_tensor(1.0, dtype=self.dtype)
  152. )
  153. else paddle.ones_like(rate, dtype=self.dtype)
  154. )
  155. upper = paddle.max(paddle.cast(rate + 30 * s_max, dtype="int32"))
  156. values = paddle.arange(0, upper, dtype=self.dtype)
  157. return values
  158. def log_prob(self, value):
  159. """Log probability density/mass function.
  160. Args:
  161. value (Tensor): The input tensor.
  162. Returns:
  163. Tensor: log probability. The data type is the same as `rate`.
  164. """
  165. value = paddle.cast(value, dtype=self.dtype)
  166. if not self._check_constraint(value):
  167. raise ValueError(
  168. 'Every element of input parameter `value` should be nonnegative.'
  169. )
  170. eps = paddle.finfo(self.rate.dtype).eps
  171. return paddle.nan_to_num(
  172. (
  173. -self.rate
  174. + value * paddle.log(self.rate)
  175. - paddle.lgamma(value + 1)
  176. ),
  177. neginf=-eps,
  178. )
  179. def prob(self, value):
  180. """Probability density/mass function.
  181. Args:
  182. value (Tensor): The input tensor.
  183. Returns:
  184. Tensor: probability. The data type is the same as `rate`.
  185. """
  186. return paddle.exp(self.log_prob(value))
  187. def kl_divergence(self, other):
  188. r"""The KL-divergence between two poisson distributions with the same `batch_shape`.
  189. The probability density function (pdf) is
  190. .. math::
  191. KL\_divergence\lambda_1, \lambda_2) = \sum_x p_1(x) \log{\frac{p_1(x)}{p_2(x)}}
  192. .. math::
  193. p_1(x) = \frac{e^{-\lambda_1} \cdot \lambda_1^x}{x!}
  194. .. math::
  195. p_2(x) = \frac{e^{-\lambda_2} \cdot \lambda_2^x}{x!}
  196. Args:
  197. other (Poisson): instance of ``Poisson``.
  198. Returns:
  199. Tensor, kl-divergence between two poisson distributions. The data type is the same as `rate`.
  200. """
  201. if self.batch_shape != other.batch_shape:
  202. raise ValueError(
  203. "KL divergence of two poisson distributions should share the same `batch_shape`."
  204. )
  205. rate_max = paddle.max(paddle.maximum(self.rate, other.rate))
  206. support_max = self._enumerate_bounded_support(rate_max)
  207. a_max = paddle.max(support_max)
  208. common_support = paddle.arange(0, a_max, dtype=self.dtype).reshape(
  209. (-1,) + (1,) * len(self.batch_shape)
  210. )
  211. log_prob_1 = self.log_prob(common_support)
  212. log_prob_2 = other.log_prob(common_support)
  213. return (paddle.exp(log_prob_1) * (log_prob_1 - log_prob_2)).sum(0)