geometric.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numbers
  15. import numpy as np
  16. import paddle
  17. from paddle.base import framework
  18. from paddle.distribution import distribution
  19. class Geometric(distribution.Distribution):
  20. r"""
  21. Geometric distribution parameterized by probs.
  22. In probability theory and statistics, the geometric distribution is one of
  23. discrete probability distributions, parameterized by one positive shape parameter, denoted by probs.
  24. In n Bernoulli trials, it takes k+1 trials to get the probability of success for the first time.
  25. In detail, it is: the probability that the first k times failed and the kth time succeeded.
  26. The geometric distribution is a special case of the Pascal distribution when r=1.
  27. The probability mass function (pmf) is
  28. .. math::
  29. Pr(Y=k)=(1-p)^kp
  30. where k is number of trials failed before seeing a success, and p is probability of success for each trial and k=0,1,2,3,4..., p belong to (0,1].
  31. Args:
  32. probs (Real|Tensor): Probability parameter.
  33. The value of probs must be positive. When the parameter is a tensor, probs is probability of success for each trial.
  34. Returns:
  35. Geometric distribution for instantiation of probs.
  36. Examples:
  37. .. code-block:: python
  38. >>> import paddle
  39. >>> from paddle.distribution import Geometric
  40. >>> geom = Geometric(0.5)
  41. >>> print(geom.mean)
  42. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  43. 1.)
  44. >>> print(geom.variance)
  45. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  46. 2.)
  47. >>> print(geom.stddev)
  48. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  49. 1.41421354)
  50. """
  51. def __init__(self, probs):
  52. if isinstance(probs, (numbers.Real, paddle.Tensor, framework.Variable)):
  53. if isinstance(probs, numbers.Real):
  54. probs = paddle.full(
  55. shape=(), fill_value=probs, dtype=paddle.float32
  56. )
  57. all_ones = paddle.full(
  58. shape=probs.shape, fill_value=1, dtype=probs.dtype
  59. )
  60. all_zeros = paddle.full(
  61. shape=probs.shape, fill_value=0, dtype=probs.dtype
  62. )
  63. all_false = paddle.full(
  64. shape=probs.shape, fill_value=False, dtype=bool
  65. )
  66. lessthen_0 = probs <= all_zeros
  67. morethen_1 = probs > all_ones
  68. else:
  69. raise TypeError(
  70. f"Expected type of probs is Number.Real|Tensor|framework.Variable, but got {type(probs)}"
  71. )
  72. if paddle.equal_all(lessthen_0, all_false) and paddle.equal_all(
  73. morethen_1, all_false
  74. ):
  75. batch_shape = tuple(probs.shape)
  76. else:
  77. raise ValueError(
  78. "Expected parameter probs of distribution Geometric to satisfy the"
  79. "constraint Interval(lower_bound=0.0, upper_bound=1.0)"
  80. )
  81. self.probs = probs
  82. super().__init__(batch_shape)
  83. @property
  84. def mean(self):
  85. """Mean of geometric distribution."""
  86. return 1.0 / self.probs - 1.0
  87. @property
  88. def variance(self):
  89. """Variance of geometric distribution."""
  90. return paddle.to_tensor(
  91. (1.0 / self.probs - 1.0) / self.probs,
  92. dtype=self.probs.dtype,
  93. )
  94. @property
  95. def stddev(self):
  96. """Standard deviation of Geometric distribution."""
  97. return paddle.sqrt(self.variance)
  98. def pmf(self, k):
  99. r"""Probability mass function evaluated at k.
  100. .. math::
  101. P(X=k) = (1-p)^{k} p, \quad k=0,1,2,3,\ldots
  102. Args:
  103. k (int): Value to be evaluated.
  104. Returns:
  105. Tensor: Probability.
  106. Examples:
  107. .. code-block:: python
  108. >>> import paddle
  109. >>> from paddle.distribution import Geometric
  110. >>> geom = Geometric(0.5)
  111. >>> print(geom.pmf(2))
  112. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  113. 0.12500000)
  114. """
  115. if isinstance(k, (numbers.Integral, framework.Variable)):
  116. return paddle.pow((1.0 - self.probs), k) * self.probs
  117. else:
  118. raise TypeError(
  119. f"Expected type of k is number.Real|framework.Variable, but got {type(k)}"
  120. )
  121. def log_pmf(self, k):
  122. r"""Log probability mass function evaluated at k.
  123. .. math::
  124. \log P(X = k) = \log(1-p)^k p
  125. Args:
  126. k (int): Value to be evaluated.
  127. Returns:
  128. Tensor: Log probability.
  129. Examples:
  130. .. code-block:: python
  131. >>> import paddle
  132. >>> from paddle.distribution import Geometric
  133. >>> geom = Geometric(0.5)
  134. >>> print(geom.log_pmf(2))
  135. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  136. -2.07944131)
  137. """
  138. if isinstance(k, (numbers.Integral, framework.Variable)):
  139. return paddle.log(self.pmf(k))
  140. else:
  141. raise TypeError(
  142. f"Expected type of k is number.Real|framework.Variable, but got {type(k)}"
  143. )
  144. def sample(self, shape=()):
  145. """Sample from Geometric distribution with sample shape.
  146. Args:
  147. shape (tuple(int)): Sample shape.
  148. Returns:
  149. Sampled data with shape `sample_shape` + `batch_shape` + `event_shape`.
  150. Examples:
  151. .. code-block:: python
  152. >>> import paddle
  153. >>> from paddle.distribution import Geometric
  154. >>> paddle.seed(2023)
  155. >>> geom = Geometric(0.5)
  156. >>> print(geom.sample((2,2)))
  157. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
  158. [[0., 0.],
  159. [1., 0.]])
  160. """
  161. with paddle.no_grad():
  162. return self.rsample(shape)
  163. def rsample(self, shape=()):
  164. """Generate samples of the specified shape.
  165. Args:
  166. shape(tuple(int)): The shape of generated samples.
  167. Returns:
  168. Tensor: A sample tensor that fits the Geometric distribution.
  169. Examples:
  170. .. code-block:: python
  171. >>> import paddle
  172. >>> from paddle.distribution import Geometric
  173. >>> paddle.seed(2023)
  174. >>> geom = Geometric(0.5)
  175. >>> print(geom.rsample((2,2)))
  176. Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
  177. [[0., 0.],
  178. [1., 0.]])
  179. """
  180. shape = distribution.Distribution._extend_shape(
  181. self, sample_shape=shape
  182. )
  183. uniform = paddle.uniform(
  184. shape=shape,
  185. min=float(np.finfo(dtype='float32').tiny),
  186. max=1.0,
  187. dtype=self.probs.dtype,
  188. )
  189. return paddle.floor(paddle.log(uniform) / paddle.log1p(-(self.probs)))
  190. def entropy(self):
  191. r"""Entropy of dirichlet distribution.
  192. .. math::
  193. H(X) = -\left[\frac{1}{p} \log p + \frac{1-p}{p^2} \log (1-p) \right]
  194. Returns:
  195. Tensor: Entropy.
  196. Examples:
  197. .. code-block:: python
  198. >>> import paddle
  199. >>> from paddle.distribution import Geometric
  200. >>> geom = Geometric(0.5)
  201. >>> print(geom.entropy())
  202. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  203. 1.38629425)
  204. """
  205. x = (1.0 - self.probs) * paddle.log(1.0 - self.probs)
  206. y = self.probs * paddle.log(self.probs)
  207. return -(x + y) / self.probs
  208. def cdf(self, k):
  209. r"""Cdf of geometric distribution.
  210. .. math::
  211. F(X \leq k) = 1 - (1-p)^(k+1), \quad k=0,1,2,\ldots
  212. Args:
  213. k: The number of trials performed.
  214. Returns:
  215. Tensor: Entropy.
  216. Examples:
  217. .. code-block:: python
  218. >>> import paddle
  219. >>> from paddle.distribution import Geometric
  220. >>> geom = Geometric(0.5)
  221. >>> print(geom.cdf(4))
  222. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  223. 0.96875000)
  224. """
  225. if isinstance(k, (numbers.Integral, framework.Variable)):
  226. return 1.0 - paddle.pow((1.0 - self.probs), k + 1)
  227. else:
  228. raise TypeError(
  229. f"Expected type of k is number.Real|framework.Variable, but got {type(k)}"
  230. )
  231. def kl_divergence(self, other):
  232. r"""Calculate the KL divergence KL(self || other) with two Geometric instances.
  233. .. math::
  234. KL(P \| Q) = \frac{p}{q} \log \frac{p}{q} + \log (1-p) - \log (1-q)
  235. Args:
  236. other (Geometric): An instance of Geometric.
  237. Returns:
  238. Tensor: The kl-divergence between two geometric distributions.
  239. Examples:
  240. .. code-block:: python
  241. >>> import paddle
  242. >>> from paddle.distribution import Geometric
  243. >>> geom_p = Geometric(0.5)
  244. >>> geom_q = Geometric(0.1)
  245. >>> print(geom_p.kl_divergence(geom_q))
  246. Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True,
  247. 0.51082563)
  248. """
  249. if isinstance(other, Geometric):
  250. p, q = self.probs, other.probs
  251. return p * paddle.log(p / q) + (1.0 - p) * paddle.log(
  252. (1.0 - p) / (1.0 - q)
  253. )
  254. else:
  255. raise TypeError(
  256. f"Exacted type of other is geometric.Geometric, but got {type(other)}"
  257. )